class BeaconServer(SCIONElement, metaclass=ABCMeta): """ The SCION PathConstructionBeacon Server. Attributes: if2rev_tokens: Contains the currently used revocation token hash-chain for each interface. """ SERVICE_TYPE = BEACON_SERVICE # Amount of time units a HOF is valid (time unit is EXP_TIME_UNIT). HOF_EXP_TIME = 63 # ZK path for incoming PCBs ZK_PCB_CACHE_PATH = "pcb_cache" # ZK path for revocations. ZK_REVOCATIONS_PATH = "rev_cache" # Time revocation objects are cached in memory (in seconds). ZK_REV_OBJ_MAX_AGE = HASHTREE_EPOCH_TIME # Interval to checked for timed out interfaces. IF_TIMEOUT_INTERVAL = 1 def __init__(self, server_id, conf_dir): """ :param str server_id: server identifier. :param str conf_dir: configuration directory. """ super().__init__(server_id, conf_dir) # TODO: add 2 policies self.path_policy = PathPolicy.from_file( os.path.join(conf_dir, PATH_POLICY_FILE)) self.signing_key = get_sig_key(self.conf_dir) self.of_gen_key = kdf(self.config.master_as_key, b"Derive OF Key") self.hashtree_gen_key = kdf(self.config.master_as_key, b"Derive hashtree Key") logging.info(self.config.__dict__) self._hash_tree = None self._hash_tree_lock = Lock() self._next_tree = None self._init_hash_tree() self.ifid_state = {} for ifid in self.ifid2br: self.ifid_state[ifid] = InterfaceState() self.ifid_state_lock = RLock() self.CTRL_PLD_CLASS_MAP = { PayloadClass.PCB: { None: self.handle_pcb }, PayloadClass.IFID: { None: self.handle_ifid_packet }, PayloadClass.CERT: { CertMgmtType.CERT_CHAIN_REQ: self.process_cert_chain_request, CertMgmtType.CERT_CHAIN_REPLY: self.process_cert_chain_reply, CertMgmtType.TRC_REPLY: self.process_trc_reply, CertMgmtType.TRC_REQ: self.process_trc_request, }, PayloadClass.PATH: { PMT.IFSTATE_REQ: self._handle_ifstate_request, PMT.REVOCATION: self._handle_revocation, }, } self.SCMP_PLD_CLASS_MAP = { SCMPClass.PATH: { SCMPPathClass.REVOKED_IF: self._handle_scmp_revocation, }, } zkid = ZkID.from_values(self.addr.isd_as, self.id, [(self.addr.host, self._port)]).pack() self.zk = Zookeeper(self.addr.isd_as, BEACON_SERVICE, zkid, self.topology.zookeepers) self.zk.retry("Joining party", self.zk.party_setup) self.pcb_cache = ZkSharedCache(self.zk, self.ZK_PCB_CACHE_PATH, self._handle_pcbs_from_zk) self.revobjs_cache = ZkSharedCache(self.zk, self.ZK_REVOCATIONS_PATH, self.process_rev_objects) self.local_rev_cache = ExpiringDict( 1000, HASHTREE_EPOCH_TIME + HASHTREE_EPOCH_TOLERANCE) self._rev_seg_lock = RLock() def _init_hash_tree(self): ifs = list(self.ifid2br.keys()) self._hash_tree = ConnectedHashTree(self.addr.isd_as, ifs, self.hashtree_gen_key, HashType.SHA256) def _get_ht_proof(self, if_id): with self._hash_tree_lock: return self._hash_tree.get_proof(if_id) def _get_ht_root(self): with self._hash_tree_lock: return self._hash_tree.get_root() def propagate_downstream_pcb(self, pcb): """ Propagates the beacon to all children. :param pcb: path segment. :type pcb: PathSegment """ propagated_pcbs = defaultdict(list) for intf in self.topology.child_interfaces: if not intf.to_if_id: continue new_pcb, meta = self._mk_prop_pcb_meta(pcb.copy(), intf.isd_as, intf.if_id) if not new_pcb: continue self.send_meta(new_pcb, meta) propagated_pcbs[(intf.isd_as, intf.if_id)].append(pcb.short_id()) return propagated_pcbs def _mk_prop_pcb_meta(self, pcb, dst_ia, egress_if): ts = pcb.get_timestamp() asm = self._create_asm(pcb.p.ifID, egress_if, ts, pcb.last_hof()) if not asm: return None, None pcb.add_asm(asm) pcb.sign(self.signing_key) one_hop_path = self._create_one_hop_path(egress_if) return pcb, self._build_meta(ia=dst_ia, host=SVCType.BS_A, path=one_hop_path, one_hop=True) def _create_one_hop_path(self, egress_if): ts = int(SCIONTime.get_time()) info = InfoOpaqueField.from_values(ts, self.addr.isd_as[0], hops=2) hf1 = HopOpaqueField.from_values(self.HOF_EXP_TIME, 0, egress_if) hf1.set_mac(self.of_gen_key, ts, None) # Return a path where second HF is empty. return SCIONPath.from_values(info, [hf1, HopOpaqueField()]) def _mk_if_info(self, if_id): """ Small helper method to make it easier to deal with ingress/egress interface being 0 while building ASMarkings. """ d = {"remote_ia": ISD_AS.from_values(0, 0), "remote_if": 0, "mtu": 0} if not if_id: return d br = self.ifid2br[if_id] d["remote_ia"] = br.interfaces[if_id].isd_as d["remote_if"] = br.interfaces[if_id].to_if_id d["mtu"] = br.interfaces[if_id].mtu return d @abstractmethod def handle_pcbs_propagation(self): """ Main loop to propagate received beacons. """ raise NotImplementedError def _log_propagations(self, propagated_pcbs): for (isd_as, if_id), pcbs in propagated_pcbs.items(): logging.debug("Propagated %d PCBs to %s via %s (%s)", len(pcbs), isd_as, if_id, ", ".join(pcbs)) def _handle_pcbs_from_zk(self, pcbs): """ Handles cached pcbs through ZK, passed as a list. """ for pcb in pcbs: try: pcb = PathSegment.from_raw(pcb) except SCIONParseError as e: logging.error("Unable to parse raw pcb: %s", e) continue self.handle_pcb(pcb) if pcbs: logging.debug("Processed %s PCBs from ZK", len(pcbs)) def handle_pcb(self, pcb, meta=None): """ Handles pcbs received from the network. """ if meta: pcb.p.ifID = meta.path.get_hof().ingress_if try: self.path_policy.check_filters(pcb) except SCIONPathPolicyViolated as e: logging.debug("Segment dropped due to path policy: %s\n%s" % (e, pcb.short_desc())) return if not self._filter_pcb(pcb): logging.debug("Segment dropped due to looping: %s" % pcb.short_desc()) return seg_meta = PathSegMeta(pcb, self.continue_seg_processing, meta) self._process_path_seg(seg_meta) def continue_seg_processing(self, seg_meta): """ For every verified pcb received from the network or ZK this function gets called to continue the processing for the pcb. """ pcb = seg_meta.seg logging.debug("Successfully verified PCB %s", pcb.short_id()) if seg_meta.meta: # Segment was received from network, not from zk. Share segment # with other beacon servers in this AS. entry_name = "%s-%s" % (pcb.get_hops_hash(hex=True), time.time()) try: self.pcb_cache.store(entry_name, pcb.copy().pack()) except ZkNoConnection: logging.error("Unable to store PCB in shared cache: " "no connection to ZK") self.handle_ext(pcb) self._handle_verified_beacon(pcb) def _filter_pcb(self, pcb, dst_ia=None): return True def handle_ext(self, pcb): """ Handle beacon extensions. """ # Handle PCB extensions if pcb.is_sibra(): logging.debug("%s", pcb.sibra_ext) for asm in pcb.iter_asms(): pol = asm.routing_pol_ext() if pol: self.handle_routing_pol_ext(pol) def handle_routing_pol_ext(self, ext): # TODO(Sezer): Implement routing policy extension handling logging.debug("Routing policy extension: %s" % ext) @abstractmethod def register_segments(self): """ Registers paths according to the received beacons. """ raise NotImplementedError def _log_registrations(self, registrations, seg_type): for (dst_meta, dst_type), pcbs in registrations.items(): logging.debug("Registered %d %s-segments @ %s:%s (%s)", len(pcbs), seg_type, dst_type.upper(), dst_meta, ", ".join(pcbs)) def _create_asm(self, in_if, out_if, ts, prev_hof): pcbms = list(self._create_pcbms(in_if, out_if, ts, prev_hof)) if not pcbms: return None chain = self._get_my_cert() _, cert_ver = chain.get_leaf_isd_as_ver() return ASMarking.from_values(self.addr.isd_as, self._get_my_trc().version, cert_ver, pcbms, self._get_ht_root(), self.topology.mtu) def _create_pcbms(self, in_if, out_if, ts, prev_hof): up_pcbm = self._create_pcbm(in_if, out_if, ts, prev_hof) if not up_pcbm: return yield up_pcbm for intf in sorted(self.topology.peer_interfaces): in_if = intf.if_id with self.ifid_state_lock: if (not self.ifid_state[in_if].is_active() and not self._quiet_startup()): continue peer_pcbm = self._create_pcbm(in_if, out_if, ts, up_pcbm.hof(), xover=True) if peer_pcbm: yield peer_pcbm def _create_pcbm(self, in_if, out_if, ts, prev_hof, xover=False): in_info = self._mk_if_info(in_if) if in_info["remote_ia"].int() and not in_info["remote_if"]: return None out_info = self._mk_if_info(out_if) if out_info["remote_ia"].int() and not out_info["remote_if"]: return None hof = HopOpaqueField.from_values(self.HOF_EXP_TIME, in_if, out_if, xover=xover) hof.set_mac(self.of_gen_key, ts, prev_hof) return PCBMarking.from_values(in_info["remote_ia"], in_info["remote_if"], in_info["mtu"], out_info["remote_ia"], out_info["remote_if"], hof) def _terminate_pcb(self, pcb): """ Copies a PCB, terminates it and adds the segment ID. Terminating a PCB means adding a opaque field with the egress IF set to 0, i.e., there is no AS to forward a packet containing this path segment to. """ pcb = pcb.copy() asm = self._create_asm(pcb.p.ifID, 0, pcb.get_timestamp(), pcb.last_hof()) if not asm: return None pcb.add_asm(asm) return pcb def handle_ifid_packet(self, pld, meta): """ Update the interface state for the corresponding interface. :param pld: The IFIDPayload. :type pld: IFIDPayload """ ifid = pld.p.relayIF with self.ifid_state_lock: if ifid not in self.ifid_state: raise SCIONKeyError("Invalid IF %d in IFIDPayload" % ifid) br = self.ifid2br[ifid] br.interfaces[ifid].to_if_id = pld.p.origIF prev_state = self.ifid_state[ifid].update() if prev_state == InterfaceState.INACTIVE: logging.info("IF %d activated", ifid) elif prev_state in [ InterfaceState.TIMED_OUT, InterfaceState.REVOKED ]: logging.info("IF %d came back up.", ifid) if not prev_state == InterfaceState.ACTIVE: if self.zk.have_lock(): # Inform BRs about the interface coming up. state_info = IFStateInfo.from_values( ifid, True, self._get_ht_proof(ifid)) pld = IFStatePayload.from_values([state_info]) for br in self.topology.border_routers: br_addr, br_port = br.int_addrs[0].public[0] meta = UDPMetadata.from_values(host=br_addr, port=br_port) self.send_meta(pld.copy(), meta, (br_addr, br_port)) def run(self): """ Run an instance of the Beacon Server. """ threading.Thread(target=thread_safety_net, args=(self.worker, ), name="BS.worker", daemon=True).start() # https://github.com/netsec-ethz/scion/issues/308: threading.Thread(target=thread_safety_net, args=(self._handle_if_timeouts, ), name="BS._handle_if_timeouts", daemon=True).start() threading.Thread(target=thread_safety_net, args=(self._create_next_tree, ), name="BS._create_next_tree", daemon=True).start() threading.Thread(target=thread_safety_net, args=(self._check_trc_cert_reqs, ), name="Elem.check_trc_cert_reqs", daemon=True).start() super().run() def _create_next_tree(self): last_ttl_window = 0 while self.run_flag.is_set(): start = time.time() cur_ttl_window = ConnectedHashTree.get_ttl_window() time_to_sleep = (ConnectedHashTree.get_time_till_next_ttl() - HASHTREE_UPDATE_WINDOW) if cur_ttl_window == last_ttl_window: time_to_sleep += HASHTREE_TTL if time_to_sleep > 0: sleep_interval(start, time_to_sleep, "BS._create_next_tree", self._quiet_startup()) # at this point, there should be <= HASHTREE_UPDATE_WINDOW # seconds left in current ttl logging.info("Started computing hashtree for next TTL window (%d)", cur_ttl_window + 2) last_ttl_window = ConnectedHashTree.get_ttl_window() ht_start = time.time() ifs = list(self.ifid2br.keys()) tree = ConnectedHashTree.get_next_tree(self.addr.isd_as, ifs, self.hashtree_gen_key, HashType.SHA256) ht_end = time.time() with self._hash_tree_lock: self._next_tree = tree logging.info( "Finished computing hashtree for TTL window %d in %.3fs" % (cur_ttl_window + 2, ht_end - ht_start)) def _maintain_hash_tree(self): """ Maintain the hashtree. Update the the windows in the connected tree """ with self._hash_tree_lock: if self._next_tree is not None: self._hash_tree.update(self._next_tree) self._next_tree = None else: logging.critical("Did not create hashtree in time; dying") kill_self() logging.info("New Hash Tree TTL window beginning: %s", ConnectedHashTree.get_ttl_window()) def worker(self): """ Worker thread that takes care of reading shared PCBs from ZK, and propagating PCBS/registering paths when master. """ last_propagation = last_registration = 0 last_ttl_window = ConnectedHashTree.get_ttl_window() worker_cycle = 1.0 start = time.time() while self.run_flag.is_set(): sleep_interval(start, worker_cycle, "BS.worker cycle", self._quiet_startup()) start = time.time() try: self.zk.wait_connected() self.pcb_cache.process() self.revobjs_cache.process() self.handle_rev_objs() cur_ttl_window = ConnectedHashTree.get_ttl_window() if cur_ttl_window != last_ttl_window: self._maintain_hash_tree() last_ttl_window = cur_ttl_window ret = self.zk.get_lock(lock_timeout=0, conn_timeout=0) if not ret: # Failed to get the lock continue elif ret == ZK_LOCK_SUCCESS: logging.info("Became master") self._became_master() self.pcb_cache.expire(self.config.propagation_time * 10) self.revobjs_cache.expire(self.ZK_REV_OBJ_MAX_AGE) except ZkNoConnection: continue now = time.time() if now - last_propagation >= self.config.propagation_time: self.handle_pcbs_propagation() last_propagation = now if (self.config.registers_paths and now - last_registration >= self.config.registration_time): try: self.register_segments() except SCIONKeyError as e: logging.error("Error while registering segments: %s", e) pass last_registration = now def _became_master(self): """ Called when a BS becomes the new master. Resets some state that will be rebuilt over time. """ # Reset all timed-out and revoked interfaces to inactive. with self.ifid_state_lock: for (_, ifstate) in self.ifid_state.items(): if not ifstate.is_active(): ifstate.reset() def _get_my_trc(self): return self.trust_store.get_trc(self.addr.isd_as[0]) def _get_my_cert(self): return self.trust_store.get_cert(self.addr.isd_as) @abstractmethod def _handle_verified_beacon(self, pcb): """ Once a beacon has been verified, place it into the right containers. :param pcb: verified path segment. :type pcb: PathSegment """ raise NotImplementedError def process_rev_objects(self, rev_infos): """ Processes revocation infos stored in Zookeeper. """ with self._rev_seg_lock: for raw in rev_infos: try: rev_info = RevocationInfo.from_raw(raw) except SCIONParseError as e: logging.error( "Error processing revocation info from ZK: %s", e) continue self.local_rev_cache[rev_info] = rev_info.copy() def _issue_revocation(self, if_id): """ Store a RevocationInfo in ZK and send a revocation to all BRs. :param if_id: The interface that needs to be revoked. :type if_id: int """ # Only the master BS issues revocations. if not self.zk.have_lock(): return rev_info = self._get_ht_proof(if_id) logging.info("Issuing revocation: %s", rev_info.short_desc()) # Issue revocation to all BRs. info = IFStateInfo.from_values(if_id, False, rev_info) pld = IFStatePayload.from_values([info]) for br in self.topology.border_routers: br_addr, br_port = br.int_addrs[0].public[0] meta = UDPMetadata.from_values(host=br_addr, port=br_port) self.send_meta(pld.copy(), meta, (br_addr, br_port)) self._process_revocation(rev_info) self._send_rev_to_local_ps(rev_info) def _send_rev_to_local_ps(self, rev_info): """ Sends the given revocation to its local path server. :param rev_info: The RevocationInfo object :type rev_info: RevocationInfo """ if self.zk.have_lock() and self.topology.path_servers: try: addr, port = self.dns_query_topo(PATH_SERVICE)[0] except SCIONServiceLookupError: # If there are no local path servers, stop here. return meta = UDPMetadata.from_values(host=addr, port=port) self.send_meta(rev_info.copy(), meta) def _handle_scmp_revocation(self, pld, meta): rev_info = RevocationInfo.from_raw(pld.info.rev_info) logging.debug("Received revocation via SCMP: %s (from %s)", rev_info.short_desc(), meta) self._process_revocation(rev_info) def _handle_revocation(self, rev_info, meta): logging.debug("Received revocation via TCP/UDP: %s (from %s)", rev_info.short_desc(), meta) if not self._validate_revocation(rev_info): return self._process_revocation(rev_info) def handle_rev_objs(self): with self._rev_seg_lock: for rev_info in self.local_rev_cache.values(): self._remove_revoked_pcbs(rev_info) def _process_revocation(self, rev_info): """ Removes PCBs containing a revoked interface and sends the revocation to the local PS. :param rev_info: The RevocationInfo object :type rev_info: RevocationInfo """ assert isinstance(rev_info, RevocationInfo) if_id = rev_info.p.ifID if not if_id: logging.error("Trying to revoke IF with ID 0.") return with self._rev_seg_lock: self.local_rev_cache[rev_info] = rev_info.copy() rev_token = rev_info.copy().pack() entry_name = "%s:%s" % (hash(rev_token), time.time()) try: self.revobjs_cache.store(entry_name, rev_token) except ZkNoConnection as exc: logging.error("Unable to store revocation in shared cache " "(no ZK connection): %s" % exc) self._remove_revoked_pcbs(rev_info) @abstractmethod def _remove_revoked_pcbs(self, rev_info): """ Removes the PCBs containing the revoked interface. :param rev_info: The RevocationInfo object. :type rev_info: RevocationInfo """ raise NotImplementedError def _pcb_list_to_remove(self, candidates, rev_info): """ Calculates the list of PCBs to remove. Called by _remove_revoked_pcbs. :param candidates: Candidate PCBs. :type candidates: List :param rev_info: The RevocationInfo object. :type rev_info: RevocationInfo """ to_remove = [] processed = set() for cand in candidates: if cand.id in processed: continue processed.add(cand.id) if not ConnectedHashTree.verify_epoch(rev_info.p.epoch): continue # If the interface on which we received the PCB is # revoked, then the corresponding pcb needs to be removed. root_verify = ConnectedHashTree.verify(rev_info, self._get_ht_root()) if (self.addr.isd_as == rev_info.isd_as() and cand.pcb.p.ifID == rev_info.p.ifID and root_verify): to_remove.append(cand.id) for asm in cand.pcb.iter_asms(): if self._verify_revocation_for_asm(rev_info, asm, False): to_remove.append(cand.id) return to_remove def _handle_if_timeouts(self): """ Periodically checks each interface state and issues an if revocation, if no keep-alive message was received for IFID_TOUT. """ if_id_last_revoked = defaultdict(int) while self.run_flag.is_set(): start_time = time.time() with self.ifid_state_lock: for (if_id, if_state) in self.ifid_state.items(): cur_epoch = ConnectedHashTree.get_current_epoch() if not if_state.is_expired() or ( if_state.is_revoked() and if_id_last_revoked[if_id] == cur_epoch): # Either the interface hasn't timed out, or it's already revoked for this # epoch continue if_id_last_revoked[if_id] = cur_epoch if not if_state.is_revoked(): logging.info("IF %d went down.", if_id) self._issue_revocation(if_id) if_state.revoke_if_expired() sleep_interval(start_time, self.IF_TIMEOUT_INTERVAL, "Handle IF timeouts") def _handle_ifstate_request(self, req, meta): # Only master replies to ifstate requests. if not self.zk.have_lock(): return assert isinstance(req, IFStateRequest) infos = [] with self.ifid_state_lock: if req.p.ifID == IFStateRequest.ALL_INTERFACES: ifid_states = self.ifid_state.items() elif req.p.ifID in self.ifid_state: ifid_states = [(req.p.ifID, self.ifid_state[req.p.ifID])] else: logging.error( "Received ifstate request from %s for unknown " "interface %s.", meta, req.p.ifID) return for (ifid, state) in ifid_states: # Don't include inactive interfaces in response. if state.is_inactive(): continue info = IFStateInfo.from_values(ifid, state.is_active(), self._get_ht_proof(ifid)) infos.append(info) if not infos and not self._quiet_startup(): logging.warning("No IF state info to put in response. Req: %s" % req.short_desc()) return payload = IFStatePayload.from_values(infos) self.send_meta(payload, meta, (meta.host, meta.port))
class SCIONElement(object): """ Base class for the different kind of servers the SCION infrastructure provides. :ivar `Topology` topology: the topology of the AS as seen by the server. :ivar `Config` config: the configuration of the AS in which the server is located. :ivar dict ifid2br: map of interface ID to RouterElement. :ivar `SCIONAddr` addr: the server's address. """ SERVICE_TYPE = None STARTUP_QUIET_PERIOD = STARTUP_QUIET_PERIOD USE_TCP = False # Timeout for TRC or Certificate requests. TRC_CC_REQ_TIMEOUT = 3 def __init__(self, server_id, conf_dir, public=None, bind=None, spki_cache_dir=GEN_CACHE_PATH, prom_export=None): """ :param str server_id: server identifier. :param str conf_dir: configuration directory. :param list public: (host_addr, port) of the element's public address (i.e. the address visible to other network elements). :param list bind: (host_addr, port) of the element's bind address, if any (i.e. the address the element uses to identify itself to the local operating system, if it differs from the public address due to NAT). :param str spki_cache_dir: Path for caching TRCs and certificate chains. :param str prom_export: String of the form 'addr:port' specifying the prometheus endpoint. If no string is provided, no metrics are exported. """ self.id = server_id self.conf_dir = conf_dir self.ifid2br = {} self.topology = Topology.from_file( os.path.join(self.conf_dir, TOPO_FILE)) # Labels attached to every exported metric. self._labels = {"server_id": self.id, "isd_as": str(self.topology.isd_as)} # Must be over-ridden by child classes: self.CTRL_PLD_CLASS_MAP = {} self.SCMP_PLD_CLASS_MAP = {} self.public = public self.bind = bind if self.SERVICE_TYPE: own_config = self.topology.get_own_config(self.SERVICE_TYPE, server_id) if public is None: self.public = own_config.public if bind is None: self.bind = own_config.bind self.init_ifid2br() self.trust_store = TrustStore(self.conf_dir, spki_cache_dir, self.id, self._labels) self.total_dropped = 0 self._core_ases = defaultdict(list) # Mapping ISD_ID->list of core ASes self.init_core_ases() self.run_flag = threading.Event() self.run_flag.set() self.stopped_flag = threading.Event() self.stopped_flag.clear() self._in_buf = queue.Queue(MAX_QUEUE) self._socks = SocketMgr() self._startup = time.time() if self.USE_TCP: self._DefaultMeta = TCPMetadata else: self._DefaultMeta = UDPMetadata self.unverified_segs = ExpiringDict(500, 60 * 60) self.unv_segs_lock = threading.RLock() self.requested_trcs = {} self.req_trcs_lock = threading.Lock() self.requested_certs = {} self.req_certs_lock = threading.Lock() # TODO(jonghoonkwon): Fix me to setup sockets for multiple public addresses host_addr, self._port = self.public[0] self.addr = SCIONAddr.from_values(self.topology.isd_as, host_addr) if prom_export: self._export_metrics(prom_export) self._init_metrics() self._setup_sockets(True) lib_sciond.init(os.path.join(SCIOND_API_SOCKDIR, "sd%s.sock" % self.addr.isd_as)) def _load_as_conf(self): return Config.from_file(os.path.join(self.conf_dir, AS_CONF_FILE)) def _setup_sockets(self, init): """ Setup incoming socket and register with dispatcher """ self._tcp_sock = None self._tcp_new_conns = queue.Queue(MAX_QUEUE) # New TCP connections. if self._port is None: # No scion socket desired. return svc = SERVICE_TO_SVC_A.get(self.SERVICE_TYPE) # Setup TCP "accept" socket. self._setup_tcp_accept_socket(svc) # Setup UDP socket if self.bind: # TODO(jonghoonkwon): Fix me to setup socket for a proper bind address, # if the element has more than one bind addresses host_addr, b_port = self.bind[0] b_addr = SCIONAddr.from_values(self.topology.isd_as, host_addr) self._udp_sock = ReliableSocket( reg=(self.addr, self._port, init, svc), bind_ip=(b_addr, b_port)) else: self._udp_sock = ReliableSocket( reg=(self.addr, self._port, init, svc)) if not self._udp_sock.registered: self._udp_sock = None return if self._labels: CONNECTED_TO_DISPATCHER.labels(**self._labels).set(1) self._port = self._udp_sock.port self._socks.add(self._udp_sock, self.handle_recv) def _setup_tcp_accept_socket(self, svc): if not self.USE_TCP: return MAX_TRIES = 40 for i in range(MAX_TRIES): try: self._tcp_sock = SCIONTCPSocket() self._tcp_sock.setsockopt(SockOpt.SOF_REUSEADDR) self._tcp_sock.set_recv_tout(TCP_ACCEPT_POLLING_TOUT) self._tcp_sock.bind((self.addr, self._port), svc=svc) self._tcp_sock.listen() break except SCIONTCPError as e: logging.warning("TCP: Cannot connect to LWIP socket: %s" % e) time.sleep(1) # Wait for dispatcher else: logging.critical("TCP: cannot init TCP socket.") kill_self() def init_ifid2br(self): for br in self.topology.border_routers: for if_id in br.interfaces: self.ifid2br[if_id] = br def init_core_ases(self): """ Initializes dict of core ASes. """ for trc in self.trust_store.get_trcs(): self._core_ases[trc.isd] = trc.get_core_ases() def is_core_as(self, isd_as=None): if not isd_as: isd_as = self.addr.isd_as return isd_as in self._core_ases[isd_as[0]] def _update_core_ases(self, trc): """ When a new trc is received, this function is called to update the core ases map """ self._core_ases[trc.isd] = trc.get_core_ases() def get_border_addr(self, ifid): br = self.ifid2br[ifid] addr_idx = br.interfaces[ifid].addr_idx br_addr, br_port = br.int_addrs[addr_idx].public[0] return br_addr, br_port def handle_msg_meta(self, msg, meta): """ Main routine to handle incoming SCION messages. """ if isinstance(meta, SCMPMetadata): handler = self._get_scmp_handler(meta.pkt) else: handler = self._get_ctrl_handler(msg) if not handler: logging.error("handler not found: %s", msg) return try: # SIBRA operates on parsed packets. if (isinstance(meta, UDPMetadata) and msg.type() == PayloadClass.SIBRA): handler(meta.pkt) else: handler(msg, meta) except SCIONBaseError: log_exception("Error handling message:\n%s" % msg) def _check_trc_cert_reqs(self): check_cyle = 1.0 while self.run_flag.is_set(): start = time.time() self._check_cert_reqs() self._check_trc_reqs() sleep_interval(start, check_cyle, "Elem._check_trc_cert_reqs cycle") def _check_trc_reqs(self): """ Checks if TRC requests timeout and resends requests if so. """ with self.req_trcs_lock: now = time.time() for (isd, ver), (req_time, meta) in self.requested_trcs.items(): if now - req_time >= self.TRC_CC_REQ_TIMEOUT: trc_req = TRCRequest.from_values(isd, ver, cache_only=True) meta = meta or self._get_cs() req_id = mk_ctrl_req_id() logging.info("Re-Requesting TRC from %s: %s [id: %016x]", meta, trc_req.short_desc(), req_id) self.send_meta(CtrlPayload(CertMgmt(trc_req), req_id=req_id), meta) self.requested_trcs[(isd, ver)] = (time.time(), meta) if self._labels: PENDING_TRC_REQS_TOTAL.labels(**self._labels).set(len(self.requested_trcs)) def _check_cert_reqs(self): """ Checks if certificate requests timeout and resends requests if so. """ with self.req_certs_lock: now = time.time() for (isd_as, ver), (req_time, meta) in self.requested_certs.items(): if now - req_time >= self.TRC_CC_REQ_TIMEOUT: cert_req = CertChainRequest.from_values(isd_as, ver, cache_only=True) meta = meta or self._get_cs() req_id = mk_ctrl_req_id() logging.info("Re-Requesting CERTCHAIN from %s: %s [id: %016x]", meta, cert_req.short_desc(), req_id) self.send_meta(CtrlPayload(CertMgmt(cert_req), req_id=req_id), meta) self.requested_certs[(isd_as, ver)] = (time.time(), meta) if self._labels: PENDING_CERT_REQS_TOTAL.labels(**self._labels).set( len(self.requested_certs)) def _process_path_seg(self, seg_meta, req_id=None): """ When a pcb or path segment is received, this function is called to find missing TRCs and certs and request them. :param seg_meta: PathSegMeta object that contains pcb/path segment """ meta_str = str(seg_meta.meta) if seg_meta.meta else "ZK" req_str = "[id: %016x]" % req_id if req_id else "" logging.debug("Handling PCB from %s: %s %s", meta_str, seg_meta.seg.short_desc(), req_str) with self.unv_segs_lock: # Close the meta of the previous seg_meta, if there was one. prev_meta = self.unverified_segs.get(seg_meta.id) if prev_meta and prev_meta.meta: prev_meta.meta.close() self.unverified_segs[seg_meta.id] = seg_meta if self._labels: UNV_SEGS_TOTAL.labels(**self._labels).set(len(self.unverified_segs)) # Find missing TRCs and certificates missing_trcs = self._missing_trc_versions(seg_meta.trc_vers) missing_certs = self._missing_cert_versions(seg_meta.cert_vers) # Update missing TRCs/certs map seg_meta.missing_trcs.update(missing_trcs) seg_meta.missing_certs.update(missing_certs) # If all necessary TRCs/certs available, try to verify if seg_meta.verifiable(): self._try_to_verify_seg(seg_meta) return # Otherwise request missing trcs, certs self._request_missing_trcs(seg_meta) self._request_missing_certs(seg_meta) if seg_meta.meta: seg_meta.meta.close() def _try_to_verify_seg(self, seg_meta): """ If this pcb/path segment can be verified, call the function to process a verified pcb/path segment """ try: self._verify_path_seg(seg_meta) except SCIONVerificationError as e: logging.error("Signature verification failed for %s: %s" % (seg_meta.seg.short_id(), e)) return with self.unv_segs_lock: self.unverified_segs.pop(seg_meta.id, None) if self._labels: UNV_SEGS_TOTAL.labels(**self._labels).set(len(self.unverified_segs)) if seg_meta.meta: seg_meta.meta.close() seg_meta.callback(seg_meta) def _get_cs(self): """ Lookup certificate servers address and return meta. """ try: addr, port = self.dns_query_topo(CERTIFICATE_SERVICE)[0] except SCIONServiceLookupError as e: logging.warning("Lookup for certificate service failed: %s", e) return None return UDPMetadata.from_values(host=addr, port=port) def _request_missing_trcs(self, seg_meta): """ For all missing TRCs which are missing to verify this pcb/path segment, request them. Request is sent to certificate server, if the pcb/path segment was received by zk. Otherwise the sender of this pcb/path segment is asked. """ missing_trcs = set() with seg_meta.miss_trc_lock: missing_trcs = seg_meta.missing_trcs.copy() if not missing_trcs: return for isd, ver in missing_trcs: with self.req_trcs_lock: req_time, meta = self.requested_trcs.get((isd, ver), (None, None)) if meta: # There is already an outstanding request for the missing TRC # from somewhere else than than the local CS if seg_meta.meta: # Update the stored meta with the latest known server that has the TRC. self.requested_trcs[(isd, ver)] = (req_time, seg_meta.meta) continue if req_time and not seg_meta.meta: # There is already an outstanding request for the missing TRC # to the local CS and we don't have a new meta. continue trc_req = TRCRequest.from_values(isd, ver, cache_only=True) meta = seg_meta.meta or self._get_cs() if not meta: logging.error("Couldn't find a CS to request TRC for PCB %s", seg_meta.seg.short_id()) continue req_id = mk_ctrl_req_id() logging.info("Requesting %sv%s TRC from %s, for PCB %s [id: %016x]", isd, ver, meta, seg_meta.seg.short_id(), req_id) with self.req_trcs_lock: self.requested_trcs[(isd, ver)] = (time.time(), seg_meta.meta) if self._labels: PENDING_TRC_REQS_TOTAL.labels(**self._labels).set(len(self.requested_trcs)) self.send_meta(CtrlPayload(CertMgmt(trc_req), req_id=req_id), meta) def _request_missing_certs(self, seg_meta): """ For all missing CCs which are missing to verify this pcb/path segment, request them. Request is sent to certificate server, if the pcb/path segment was received by zk. Otherwise the sender of this pcb/path segment is asked. """ missing_certs = set() with seg_meta.miss_cert_lock: missing_certs = seg_meta.missing_certs.copy() if not missing_certs: return for isd_as, ver in missing_certs: with self.req_certs_lock: req_time, meta = self.requested_certs.get((isd_as, ver), (None, None)) if meta: # There is already an outstanding request for the missing cert # from somewhere else than than the local CS if seg_meta.meta: # Update the stored meta with the latest known server that has the cert. self.requested_certs[(isd_as, ver)] = (req_time, seg_meta.meta) continue if req_time and not seg_meta.meta: # There is already an outstanding request for the missing cert # to the local CS and we don't have a new meta. continue cert_req = CertChainRequest.from_values(isd_as, ver, cache_only=True) meta = seg_meta.meta or self._get_cs() if not meta: logging.error("Couldn't find a CS to request CERTCHAIN for PCB %s", seg_meta.seg.short_id()) continue req_id = mk_ctrl_req_id() logging.info("Requesting %sv%s CERTCHAIN from %s for PCB %s [id: %016x]", isd_as, ver, meta, seg_meta.seg.short_id(), req_id) with self.req_certs_lock: self.requested_certs[(isd_as, ver)] = (time.time(), seg_meta.meta) if self._labels: PENDING_CERT_REQS_TOTAL.labels(**self._labels).set(len(self.requested_certs)) self.send_meta(CtrlPayload(CertMgmt(cert_req), req_id=req_id), meta) def _missing_trc_versions(self, trc_versions): """ Check which intermediate trcs are missing and return their versions. :returns: the missing TRCs' :rtype set """ missing_trcs = set() for isd, versions in trc_versions.items(): # If not local TRC, only request versions contained in ASMarkings if isd is not self.topology.isd_as[0]: for ver in versions: if self.trust_store.get_trc(isd, ver) is None: missing_trcs.add((isd, ver)) continue # Local TRC max_req_ver = max(versions) max_local_ver = self.trust_store.get_trc(isd) lower_ver = 0 if max_local_ver is None: # This should never happen logging.critical("Local TRC not found!") kill_self() lower_ver = max_local_ver.version + 1 for ver in range(lower_ver, max_req_ver + 1): missing_trcs.add((isd, ver)) return missing_trcs def _missing_cert_versions(self, cert_versions): """ Check which and certificates are missing return their versions. :returns: the missing certs' versions :rtype set """ missing_certs = set() for isd_as, versions in cert_versions.items(): for ver in versions: if self.trust_store.get_cert(isd_as, ver) is None: missing_certs.add((isd_as, ver)) return missing_certs def process_trc_reply(self, cpld, meta): """ Process the TRC reply. :param rep: TRC reply. :type rep: TRCReply. """ meta.close() cmgt = cpld.union rep = cmgt.union assert isinstance(rep, TRCReply), type(rep) isd, ver = rep.trc.get_isd_ver() logging.info("TRC reply received for %sv%s from %s [id: %s]", isd, ver, meta, cpld.req_id_str()) self.trust_store.add_trc(rep.trc, True) # Update core ases for isd this trc belongs to max_local_ver = self.trust_store.get_trc(rep.trc.isd) if max_local_ver.version == rep.trc.version: self._update_core_ases(rep.trc) with self.req_trcs_lock: self.requested_trcs.pop((isd, ver), None) if self._labels: PENDING_TRC_REQS_TOTAL.labels(**self._labels).set(len(self.requested_trcs)) # Send trc to CS if meta.get_addr().isd_as != self.addr.isd_as: cs_meta = self._get_cs() self.send_meta(CtrlPayload(CertMgmt(rep)), cs_meta) cs_meta.close() # Remove received TRC from map self._check_segs_with_rec_trc(isd, ver) def _check_segs_with_rec_trc(self, isd, ver): """ When a trc reply is received, this method is called to check which segments can be verified. For all segments that can be verified, the processing is continued. """ with self.unv_segs_lock: for seg_meta in list(self.unverified_segs.values()): with seg_meta.miss_trc_lock: seg_meta.missing_trcs.discard((isd, ver)) # If all required trcs and certs are received if seg_meta.verifiable(): self._try_to_verify_seg(seg_meta) def process_trc_request(self, cpld, meta): """Process a TRC request.""" cmgt = cpld.union req = cmgt.union assert isinstance(req, TRCRequest), type(req) isd, ver = req.isd_as()[0], req.p.version logging.info("TRC request received for %sv%s from %s [id: %s]" % (isd, ver, meta, cpld.req_id_str())) trc = self.trust_store.get_trc(isd, ver) if trc: self.send_meta( CtrlPayload(CertMgmt(TRCReply.from_values(trc)), req_id=cpld.req_id), meta) else: logging.warning("Could not find requested TRC %sv%s [id: %s]" % (isd, ver, cpld.req_id_str())) def process_cert_chain_reply(self, cpld, meta): """Process a certificate chain reply.""" cmgt = cpld.union rep = cmgt.union assert isinstance(rep, CertChainReply), type(rep) meta.close() isd_as, ver = rep.chain.get_leaf_isd_as_ver() logging.info("Cert chain reply received for %sv%s from %s [id: %s]", isd_as, ver, meta, cpld.req_id_str()) self.trust_store.add_cert(rep.chain, True) with self.req_certs_lock: self.requested_certs.pop((isd_as, ver), None) if self._labels: PENDING_CERT_REQS_TOTAL.labels(**self._labels).set(len(self.requested_certs)) # Send cc to CS if meta.get_addr().isd_as != self.addr.isd_as: cs_meta = self._get_cs() self.send_meta(CtrlPayload(CertMgmt(rep)), cs_meta) cs_meta.close() # Remove received cert chain from map self._check_segs_with_rec_cert(isd_as, ver) def _check_segs_with_rec_cert(self, isd_as, ver): """ When a CC reply is received, this method is called to check which segments can be verified. For all segments that can be verified, the processing is continued. """ with self.unv_segs_lock: for seg_meta in list(self.unverified_segs.values()): with seg_meta.miss_cert_lock: seg_meta.missing_certs.discard((isd_as, ver)) # If all required trcs and certs are received. if seg_meta.verifiable(): self._try_to_verify_seg(seg_meta) def process_cert_chain_request(self, cpld, meta): """Process a certificate chain request.""" cmgt = cpld.union req = cmgt.union assert isinstance(req, CertChainRequest), type(req) isd_as, ver = req.isd_as(), req.p.version logging.info("Cert chain request received for %sv%s from %s [id: %s]" % (isd_as, ver, meta, cpld.req_id_str())) cert = self.trust_store.get_cert(isd_as, ver) if cert: self.send_meta( CtrlPayload(CertMgmt(CertChainReply.from_values(cert)), req_id=cpld.req_id), meta) else: logging.warning("Could not find requested certificate %sv%s [id: %s]" % (isd_as, ver, cpld.req_id_str())) def _verify_path_seg(self, seg_meta): """ Signature verification for all AS markings within this pcb/path segment. This function is called, when all TRCs and CCs used within this pcb/path segment are available. """ seg = seg_meta.seg exp_time = seg.get_expiration_time() for i, asm in enumerate(seg.iter_asms()): cert_ia = asm.isd_as() trc = self.trust_store.get_trc(cert_ia[0], asm.p.trcVer) chain = self.trust_store.get_cert(asm.isd_as(), asm.p.certVer) self._verify_exp_time(exp_time, chain) verify_chain_trc(cert_ia, chain, trc) seg.verify(chain.as_cert.subject_sig_key_raw, i) def _verify_exp_time(self, exp_time, chain): """ Verify that certificate chain cover the expiration time. :raises SCIONVerificationError """ # chain is only verifiable if TRC.exp_time >= CoreCert.exp_time >= LeafCert.exp_time if chain.as_cert.expiration_time < exp_time: raise SCIONVerificationError( "Certificate chain %sv%s expires before path segment" % chain.get_leaf_isd_as_ver()) def _get_ctrl_handler(self, msg): pclass = msg.type() try: type_map = self.CTRL_PLD_CLASS_MAP[pclass] except KeyError: logging.error("Control payload class not supported: %s\n%s", pclass, msg) return None ptype = msg.inner_type() try: return type_map[ptype] except KeyError: logging.error("%s control payload type not supported: %s\n%s", pclass, ptype, msg) return None def _get_scmp_handler(self, pkt): scmp = pkt.l4_hdr try: type_map = self.SCMP_PLD_CLASS_MAP[scmp.class_] except KeyError: logging.error("SCMP class not supported: %s(%s)\n%s", scmp.class_, SCMPClass.to_str(scmp.class_), pkt) return None try: return type_map[scmp.type] except KeyError: logging.error("SCMP %s type not supported: %s(%s)\n%s", scmp.type, scmp.class_, scmp_type_name(scmp.class_, scmp.type), pkt) return None def _parse_packet(self, packet): try: pkt = SCIONL4Packet(packet) except SCMPError as e: self._scmp_parse_error(packet, e) return None except SCIONBaseError: log_exception("Error parsing packet: %s" % hex_str(packet), level=logging.ERROR) return None try: pkt.validate(len(packet)) except SCMPError as e: self._scmp_validate_error(pkt, e) return None except SCIONChecksumFailed: logging.debug("Dropping packet due to failed checksum:\n%s", pkt) return pkt def _scmp_parse_error(self, packet, e): HDR_TYPE_OFFSET = 6 if packet[HDR_TYPE_OFFSET] == L4Proto.SCMP: # Ideally, never respond to an SCMP error with an SCMP error. # However, if parsing failed, we can (at best) only determine if # it's an SCMP packet, so just drop SCMP packets on parse error. logging.warning("Dropping SCMP packet due to parse error. %s", e) return # For now, none of these can be properly handled, so just log and drop # the packet. In the future, the "x Not Supported" errors might be # handlable in the case of deprecating old versions. DROP = SCMPBadVersion, SCMPBadSrcType, SCMPBadDstType assert isinstance(e, DROP), type(e) logging.warning("Dropping packet due to parse error: %s", e) def _scmp_validate_error(self, pkt, e): if pkt.cmn_hdr.next_hdr == L4Proto.SCMP and pkt.ext_hdrs[0].error: # Never respond to an SCMP error with an SCMP error. logging.info( "Dropping SCMP error packet due to validation error. %s", e) return if isinstance(e, (SCMPBadIOFOffset, SCMPBadHOFOffset)): # Can't handle normally, as the packet isn't reversible. reply = self._scmp_bad_path_metadata(pkt, e) else: logging.warning("Error: %s", type(e)) reply = pkt.reversed_copy() args = () if isinstance(e, SCMPUnspecified): args = (str(e),) elif isinstance(e, (SCMPOversizePkt, SCMPBadPktLen)): args = (e.args[1],) # the relevant MTU. elif isinstance(e, (SCMPTooManyHopByHop, SCMPBadExtOrder, SCMPBadHopByHop)): args = e.args if isinstance(e, SCMPBadExtOrder): # Delete the problematic extension. del reply.ext_hdrs[args[0]] reply.convert_to_scmp_error(self.addr, e.CLASS, e.TYPE, pkt, *args) if pkt.addrs.src.isd_as == self.addr.isd_as: # No path needed for a local reply. reply.path = SCIONPath() next_hop, port = self.get_first_hop(reply) reply.update() self.send(reply, next_hop, port) def _scmp_bad_path_metadata(self, pkt, e): """ Handle a packet with an invalid IOF/HOF offset in the common header. As the path can't be used, a response can only be sent if the source is local (as that doesn't require a path). """ if pkt.addrs.src.isd_as != self.addr.isd_as: logging.warning( "Invalid path metadata in packet from " "non-local source, dropping: %s\n%s\n%s\n%s", e, pkt.cmn_hdr, pkt.addrs, pkt.path) return reply = copy.deepcopy(pkt) # Remove existing path before reversing. reply.path = SCIONPath() reply.reverse() reply.convert_to_scmp_error(self.addr, e.CLASS, e.TYPE, pkt) reply.update() logging.warning( "Invalid path metadata in packet from " "local source, sending SCMP error: %s\n%s\n%s\n%s", e, pkt.cmn_hdr, pkt.addrs, pkt.path) return reply def get_first_hop(self, spkt): """ Returns first hop addr of down-path or end-host addr. """ return self._get_first_hop(spkt.path, spkt.addrs.dst, spkt.ext_hdrs) def _get_first_hop(self, path, dst, ext_hdrs=()): if_id = self._ext_first_hop(ext_hdrs) if if_id is None: if len(path) == 0: return self._empty_first_hop(dst) if_id = path.get_fwd_if() if if_id in self.ifid2br: return self.get_border_addr(if_id) logging.error("Unable to find first hop:\n%s", path) return None, None def _ext_first_hop(self, ext_hdrs): for hdr in ext_hdrs: if_id = hdr.get_next_ifid() if if_id is not None: return if_id def _empty_first_hop(self, dst): if dst.isd_as != self.addr.isd_as: logging.error("Packet to remote AS w/o path, dst: %s", dst) return None, None host = dst.host if host.TYPE == AddrType.SVC: host = self.dns_query_topo(SVC_TO_SERVICE[host.addr])[0][0] return host, SCION_UDP_EH_DATA_PORT def _build_packet(self, dst_host=None, path=None, ext_hdrs=(), dst_ia=None, payload=None, dst_port=0): if dst_host is None: dst_host = HostAddrNone() if dst_ia is None: dst_ia = self.addr.isd_as if path is None: path = SCIONPath() if payload is None: payload = PayloadRaw() dst_addr = SCIONAddr.from_values(dst_ia, dst_host) cmn_hdr, addr_hdr = build_base_hdrs(dst_addr, self.addr) udp_hdr = SCIONUDPHeader.from_values( self.addr, self._port, dst_addr, dst_port) return SCIONL4Packet.from_values( cmn_hdr, addr_hdr, path, ext_hdrs, udp_hdr, payload) def send(self, packet, dst, dst_port): """ Send *packet* to *dst* (to port *dst_port*) using the local socket. Calling ``packet.pack()`` should return :class:`bytes`, and ``dst.__str__()`` should return a string representing an IP address. :param packet: the packet to be sent to the destination. :param str dst: the destination IP address. :param int dst_port: the destination port number. """ assert not isinstance(packet.addrs.src.host, HostAddrNone), type(packet.addrs.src.host) assert not isinstance(packet.addrs.dst.host, HostAddrNone), type(packet.addrs.dst.host) assert isinstance(packet, SCIONBasePacket), type(packet) assert isinstance(dst_port, int), type(dst_port) if not self._udp_sock: return False return self._udp_sock.send(packet.pack(), (dst, dst_port)) def send_meta(self, msg, meta, next_hop_port=None): if isinstance(meta, TCPMetadata): assert not next_hop_port, next_hop_port return self._send_meta_tcp(msg, meta) elif isinstance(meta, SockOnlyMetadata): assert not next_hop_port, next_hop_port return meta.sock.send(msg) elif isinstance(meta, UDPMetadata): dst_port = meta.port else: logging.error("Unsupported metadata: %s" % meta.__name__) return False pkt = self._build_packet(meta.host, meta.path, meta.ext_hdrs, meta.ia, msg, dst_port) if not next_hop_port: next_hop_port = self.get_first_hop(pkt) if next_hop_port == (None, None): logging.error("Can't find first hop, dropping packet\n%s", pkt) return False return self.send(pkt, *next_hop_port) def _send_meta_tcp(self, msg, meta): if not meta.sock: tcp_sock = self._tcp_sock_from_meta(meta) meta.sock = tcp_sock self._tcp_conns_put(tcp_sock) return meta.sock.send_msg(msg.pack()) def _tcp_sock_from_meta(self, meta): assert meta.host dst = meta.get_addr() first_ip, first_port = self._get_first_hop(meta.path, dst) active = True try: # Create low-level TCP socket and connect sock = SCIONTCPSocket() sock.bind((self.addr, 0)) sock.connect(dst, meta.port, meta.path, first_ip, first_port, flags=meta.flags) except SCIONTCPError: log_exception("TCP: connection init error, marking socket inactive") sock = None active = False # Create and return TCPSocketWrapper return TCPSocketWrapper(sock, dst, meta.path, active) def _tcp_conns_put(self, sock): dropped = 0 while True: try: self._tcp_new_conns.put(sock, block=False) except queue.Full: old_sock = self._tcp_new_conns.get_nowait() old_sock.close() logging.error("TCP: _tcp_new_conns is full. Closing old socket") dropped += 1 else: break if dropped > 0: logging.warning("%d TCP connection(s) dropped" % dropped) def run(self): """ Main routine to receive packets and pass them to :func:`handle_request()`. """ self._tcp_start() threading.Thread( target=thread_safety_net, args=(self.packet_recv,), name="Elem.packet_recv", daemon=True).start() try: self._packet_process() except SCIONBaseError: log_exception("Error processing packet.") finally: self.stop() def packet_put(self, packet, addr, sock): """ Try to put incoming packet in queue If queue is full, drop oldest packet in queue """ msg, meta = self._get_msg_meta(packet, addr, sock) if msg is None: return self._in_buf_put((msg, meta)) def _in_buf_put(self, item): dropped = 0 while True: try: self._in_buf.put(item, block=False) if self._labels: PKT_BUF_BYTES.labels(**self._labels).inc(len(item[0])) except queue.Full: msg, _ = self._in_buf.get_nowait() dropped += 1 if self._labels: PKTS_DROPPED_TOTAL.labels(**self._labels).inc() PKT_BUF_BYTES.labels(**self._labels).dec(len(msg)) else: break finally: if self._labels: PKT_BUF_TOTAL.labels(**self._labels).set(self._in_buf.qsize()) if dropped > 0: self.total_dropped += dropped logging.warning("%d packet(s) dropped (%d total dropped so far)", dropped, self.total_dropped) def _get_msg_meta(self, packet, addr, sock): pkt = self._parse_packet(packet) if not pkt: logging.error("Cannot parse packet:\n%s" % packet) return None, None # Create metadata: rev_pkt = pkt.reversed_copy() # Skip OneHopPathExt (if exists) exts = [] for e in rev_pkt.ext_hdrs: if not isinstance(e, OneHopPathExt): exts.append(e) if rev_pkt.l4_hdr.TYPE == L4Proto.UDP: meta = UDPMetadata.from_values(ia=rev_pkt.addrs.dst.isd_as, host=rev_pkt.addrs.dst.host, path=rev_pkt.path, ext_hdrs=exts, port=rev_pkt.l4_hdr.dst_port) elif rev_pkt.l4_hdr.TYPE == L4Proto.SCMP: meta = SCMPMetadata.from_values(ia=rev_pkt.addrs.dst.isd_as, host=rev_pkt.addrs.dst.host, path=rev_pkt.path, ext_hdrs=exts) else: logging.error("Cannot create meta for: %s" % pkt) return None, None # FIXME(PSz): for now it is needed by SIBRA service. meta.pkt = pkt try: pkt.parse_payload() except SCIONParseError as e: logging.error("Cannot parse payload\n Error: %s\n Pkt: %s", e, pkt) return None, meta return pkt.get_payload(), meta def handle_accept(self, sock): """ Callback to handle a ready listening socket """ s = sock.accept() if not s: logging.error("accept failed") return self._socks.add(s, self.handle_recv) def handle_recv(self, sock): """ Callback to handle a ready recving socket """ packet, addr = sock.recv() if packet is None: self._socks.remove(sock) sock.close() if sock == self._udp_sock: self._udp_sock = None if self._labels: CONNECTED_TO_DISPATCHER.labels(**self._labels).set(0) return self.packet_put(packet, addr, sock) def packet_recv(self): """ Read packets from sockets, and put them into a :class:`queue.Queue`. """ while self.run_flag.is_set(): if not self._udp_sock: self._setup_sockets(False) for sock, callback in self._socks.select_(timeout=0.1): callback(sock) self._tcp_socks_update() self._socks.close() self.stopped_flag.set() def _packet_process(self): """ Read packets from a :class:`queue.Queue`, and process them. """ while self.run_flag.is_set(): try: msg, meta = self._in_buf.get(timeout=1.0) if self._labels: PKT_BUF_BYTES.labels(**self._labels).dec(len(msg)) PKT_BUF_TOTAL.labels(**self._labels).set(self._in_buf.qsize()) self.handle_msg_meta(msg, meta) except queue.Empty: continue def _tcp_start(self): if not self.USE_TCP: return if not self._tcp_sock: logging.warning("TCP: accept socket is unset, port:%d", self._port) return threading.Thread( target=thread_safety_net, args=(self._tcp_accept_loop,), name="Elem._tcp_accept_loop", daemon=True).start() def _tcp_accept_loop(self): while self.run_flag.is_set(): try: logging.debug("TCP: waiting for connections") self._tcp_conns_put(TCPSocketWrapper(*self._tcp_sock.accept())) logging.debug("TCP: accepted connection") except SCIONTCPTimeout: pass except SCIONTCPError: log_exception("TCP: error on accept()") logging.error("TCP: leaving the accept loop") break try: self._tcp_sock.close() except SCIONTCPError: log_exception("TCP: error on closing _tcp_sock") def _tcp_socks_update(self): if not self.USE_TCP: return self._socks.remove_inactive() self._tcp_add_waiting() def _tcp_add_waiting(self): while True: try: self._socks.add(self._tcp_new_conns.get_nowait(), self._tcp_handle_recv) except queue.Empty: break def _tcp_handle_recv(self, sock): """ Callback to handle a ready recving socket """ msg, meta = sock.get_msg_meta() logging.debug("tcp_handle_recv:%s, %s", msg, meta) if msg is None and meta is None: self._socks.remove(sock) sock.close() return if msg: self._in_buf_put((msg, meta)) def _tcp_clean(self): if not hasattr(self, "_tcp_sock") or not self._tcp_sock: return # Close all TCP sockets. while not self._tcp_new_conns.empty(): try: tcp_sock = self._tcp_new_conns.get_nowait() except queue.Empty: break tcp_sock.close() def stop(self): """Shut down the daemon thread.""" # Signal that the thread should stop self.run_flag.clear() # Wait for the thread to finish self.stopped_flag.wait(5) # Close tcp sockets. self._tcp_clean() def _quiet_startup(self): return (time.time() - self._startup) < self.STARTUP_QUIET_PERIOD def dns_query_topo(self, qname): """ Query dns for an answer. If the answer is empty, or an error occurs then return the relevant topology entries instead. :param str qname: Service to query for. """ assert qname in SERVICE_TYPES service_map = { BEACON_SERVICE: self.topology.beacon_servers, CERTIFICATE_SERVICE: self.topology.certificate_servers, PATH_SERVICE: self.topology.path_servers, SIBRA_SERVICE: self.topology.sibra_servers, } # Generate fallback from local topology results = [] for srv in service_map[qname]: addr, port = srv.public[0] results.append((addr, port)) # FIXME(kormat): replace with new discovery service when that's ready. if not results: # No results from local toplogy either raise SCIONServiceLookupError("No %s servers found" % qname) return results def _verify_revocation_for_asm(self, rev_info, as_marking, verify_all=True): """ Verifies a revocation for a given AS marking. :param rev_info: The RevocationInfo object. :param as_marking: The ASMarking object. :param verify_all: If true, verify all PCBMs (including peers), otherwise only verify the up/down hop. :return: True, if the revocation successfully revokes an upstream interface in the AS marking, False otherwise. """ if rev_info.isd_as() != as_marking.isd_as(): return False if not ConnectedHashTree.verify(rev_info, as_marking.p.hashTreeRoot): logging.error("Revocation verification failed. %s", rev_info) return False for pcbm in as_marking.iter_pcbms(): if rev_info.p.ifID in [pcbm.hof().ingress_if, pcbm.hof().egress_if]: return True if not verify_all: break return False def _build_meta(self, ia=None, host=None, path=None, port=0, reuse=False, one_hop=False): if ia is None: ia = self.addr.isd_as if path is None: path = SCIONPath() if not one_hop: return self._DefaultMeta.from_values(ia, host, path, port=port, reuse=reuse) # One hop path extension in handled in a different way in TCP and UDP if self._DefaultMeta == TCPMetadata: return TCPMetadata.from_values(ia, host, path, port=port, reuse=reuse, flags=TCPFlags.ONEHOPPATH) return UDPMetadata.from_values(ia, host, path, port=port, reuse=reuse, ext_hdrs=[OneHopPathExt()]) def _export_metrics(self, export_addr): """ Starts an HTTP server endpoint for prometheus to scrape. """ addr, port = export_addr.split(":") port = int(port) addr = addr.strip("[]") logging.info("Exporting metrics on %s", export_addr) start_http_server(port, addr=addr) def _init_metrics(self): """ Initializes all metrics to 0. Subclasses should initialize their metrics here and must call the super method. """ PKT_BUF_TOTAL.labels(**self._labels).set(0) PKT_BUF_BYTES.labels(**self._labels).set(0) PKTS_DROPPED_TOTAL.labels(**self._labels).inc(0) UNV_SEGS_TOTAL.labels(**self._labels).set(0) PENDING_TRC_REQS_TOTAL.labels(**self._labels).set(0) PENDING_CERT_REQS_TOTAL.labels(**self._labels).set(0) CONNECTED_TO_DISPATCHER.labels(**self._labels).set(0) def _get_path_via_sciond(self, isd_as, flush=False): flags = lib_sciond.PathRequestFlags(flush=flush) start = time.time() while time.time() - start < API_TOUT: try: path_entries = lib_sciond.get_paths(isd_as, flags=flags) except lib_sciond.SCIONDLibError as e: logging.error("Error during path lookup: %s" % e) continue if path_entries: return path_entries[0].path() logging.warning("Unable to get path to %s from SCIOND.", isd_as) return None
class BeaconServer(SCIONElement, metaclass=ABCMeta): """ The SCION PathConstructionBeacon Server. Attributes: if2rev_tokens: Contains the currently used revocation token hash-chain for each interface. """ SERVICE_TYPE = BEACON_SERVICE # Amount of time units a HOF is valid (time unit is EXP_TIME_UNIT). HOF_EXP_TIME = 63 # Timeout for TRC or Certificate requests. REQUESTS_TIMEOUT = 10 # ZK path for incoming PCBs ZK_PCB_CACHE_PATH = "pcb_cache" # ZK path for revocations. ZK_REVOCATIONS_PATH = "rev_cache" # Time revocation objects are cached in memory (in seconds). ZK_REV_OBJ_MAX_AGE = HASHTREE_EPOCH_TIME # Interval to checked for timed out interfaces. IF_TIMEOUT_INTERVAL = 1 def __init__(self, server_id, conf_dir): """ :param str server_id: server identifier. :param str conf_dir: configuration directory. """ super().__init__(server_id, conf_dir) # TODO: add 2 policies self.path_policy = PathPolicy.from_file( os.path.join(conf_dir, PATH_POLICY_FILE)) self.unverified_beacons = deque() self.trc_requests = {} self.trcs = {} sig_key_file = get_sig_key_file_path(self.conf_dir) self.signing_key = base64.b64decode(read_file(sig_key_file)) self.of_gen_key = PBKDF2(self.config.master_as_key, b"Derive OF Key") self.hashtree_gen_key = PBKDF2(self.config.master_as_key, b"Derive hashtree Key") logging.info(self.config.__dict__) self._hash_tree = None self._hash_tree_lock = Lock() self._next_tree = None self._init_hash_tree() self.ifid_state = {} for ifid in self.ifid2br: self.ifid_state[ifid] = InterfaceState() self.ifid_state_lock = RLock() self.CTRL_PLD_CLASS_MAP = { PayloadClass.PCB: { None: self.handle_pcb }, PayloadClass.IFID: { None: self.handle_ifid_packet }, PayloadClass.CERT: { CertMgmtType.CERT_CHAIN_REPLY: self.process_cert_chain_rep, CertMgmtType.TRC_REPLY: self.process_trc_rep, }, PayloadClass.PATH: { PMT.IFSTATE_REQ: self._handle_ifstate_request, PMT.REVOCATION: self._handle_revocation, }, } self.SCMP_PLD_CLASS_MAP = { SCMPClass.PATH: { SCMPPathClass.REVOKED_IF: self._handle_scmp_revocation, }, } zkid = ZkID.from_values(self.addr.isd_as, self.id, [(self.addr.host, self._port)]).pack() self.zk = Zookeeper(self.addr.isd_as, BEACON_SERVICE, zkid, self.topology.zookeepers) self.zk.retry("Joining party", self.zk.party_setup) self.incoming_pcbs = deque() self.pcb_cache = ZkSharedCache(self.zk, self.ZK_PCB_CACHE_PATH, self.process_pcbs) self.revobjs_cache = ZkSharedCache(self.zk, self.ZK_REVOCATIONS_PATH, self.process_rev_objects) self.local_rev_cache = ExpiringDict( 1000, HASHTREE_EPOCH_TIME + HASHTREE_EPOCH_TOLERANCE) self.local_rev_cache_lock = Lock() def _init_hash_tree(self): ifs = list(self.ifid2br.keys()) self._hash_tree = ConnectedHashTree(self.addr.isd_as, ifs, self.hashtree_gen_key) def _get_ht_proof(self, if_id): with self._hash_tree_lock: return self._hash_tree.get_proof(if_id) def _get_ht_root(self): with self._hash_tree_lock: return self._hash_tree.get_root() def propagate_downstream_pcb(self, pcb): """ Propagates the beacon to all children. :param pcb: path segment. :type pcb: PathSegment """ for r in self.topology.child_border_routers: if not r.interface.to_if_id: continue new_pcb, meta = self._mk_prop_pcb_meta(pcb.copy(), r.interface.isd_as, r.interface.if_id) if not new_pcb: continue self.send_meta(new_pcb, meta) logging.info("Downstream PCB propagated to %s via IF %s", r.interface.isd_as, r.interface.if_id) def _mk_prop_pcb_meta(self, pcb, dst_ia, egress_if): ts = pcb.get_timestamp() asm = self._create_asm(pcb.p.ifID, egress_if, ts, pcb.last_hof()) if not asm: return None, None pcb.add_asm(asm) pcb.sign(self.signing_key) one_hop_path = self._create_one_hop_path(egress_if) if self.DefaultMeta == TCPMetadata: return pcb, self.DefaultMeta.from_values(ia=dst_ia, host=SVCType.BS_A, path=one_hop_path, flags=TCPFlags.ONEHOPPATH) return pcb, UDPMetadata.from_values(ia=dst_ia, host=SVCType.BS_A, path=one_hop_path, ext_hdrs=[OneHopPathExt()]) def _create_one_hop_path(self, egress_if): ts = int(SCIONTime.get_time()) info = InfoOpaqueField.from_values(ts, self.addr.isd_as[0], hops=2) hf1 = HopOpaqueField.from_values(self.HOF_EXP_TIME, 0, egress_if) hf1.set_mac(self.of_gen_key, ts, None) # Return a path where second HF is empty. return SCIONPath.from_values(info, [hf1, HopOpaqueField()]) def _mk_if_info(self, if_id): """ Small helper method to make it easier to deal with ingress/egress interface being 0 while building ASMarkings. """ d = {"remote_ia": ISD_AS.from_values(0, 0), "remote_if": 0, "mtu": 0} if not if_id: return d br = self.ifid2br[if_id] d["remote_ia"] = br.interface.isd_as d["remote_if"] = br.interface.to_if_id d["mtu"] = br.interface.mtu return d @abstractmethod def handle_pcbs_propagation(self): """ Main loop to propagate received beacons. """ raise NotImplementedError def handle_pcb(self, pcb, meta): """Receives beacon and stores it for processing.""" pcb.p.ifID = meta.path.get_hof().ingress_if if not self.path_policy.check_filters(pcb): return self.incoming_pcbs.append(pcb) meta.close() entry_name = "%s-%s" % (pcb.get_hops_hash(hex=True), time.time()) try: self.pcb_cache.store(entry_name, pcb.copy().pack()) except ZkNoConnection: logging.error("Unable to store PCB in shared cache: " "no connection to ZK") def handle_ext(self, pcb): """ Handle beacon extensions. """ # Handle PCB extensions: if pcb.is_sibra(): logging.debug("%s", pcb.sibra_ext) @abstractmethod def process_pcbs(self, pcbs, raw=True): """ Processes new beacons and appends them to beacon list. """ raise NotImplementedError def process_pcb_queue(self): pcbs = [] while self.incoming_pcbs: pcbs.append(self.incoming_pcbs.popleft()) self.process_pcbs(pcbs, raw=False) logging.debug("Processed %d pcbs from incoming queue", len(pcbs)) @abstractmethod def register_segments(self): """ Registers paths according to the received beacons. """ raise NotImplementedError def _create_asm(self, in_if, out_if, ts, prev_hof): pcbms = list(self._create_pcbms(in_if, out_if, ts, prev_hof)) if not pcbms: return None chain = self._get_my_cert() _, cert_ver = chain.get_leaf_isd_as_ver() return ASMarking.from_values(self.addr.isd_as, self._get_my_trc().version, cert_ver, pcbms, self._get_ht_root(), self.topology.mtu, chain) def _create_pcbms(self, in_if, out_if, ts, prev_hof): up_pcbm = self._create_pcbm(in_if, out_if, ts, prev_hof) if not up_pcbm: return yield up_pcbm for br in sorted(self.topology.peer_border_routers): in_if = br.interface.if_id with self.ifid_state_lock: if (not self.ifid_state[in_if].is_active() and not self._quiet_startup()): logging.warning('Peer ifid:%d inactive (not added).', in_if) continue peer_pcbm = self._create_pcbm(in_if, out_if, ts, up_pcbm.hof(), xover=True) if peer_pcbm: yield peer_pcbm def _create_pcbm(self, in_if, out_if, ts, prev_hof, xover=False): in_info = self._mk_if_info(in_if) if in_info["remote_ia"].int() and not in_info["remote_if"]: return None out_info = self._mk_if_info(out_if) if out_info["remote_ia"].int() and not out_info["remote_if"]: return None hof = HopOpaqueField.from_values(self.HOF_EXP_TIME, in_if, out_if, xover=xover) hof.set_mac(self.of_gen_key, ts, prev_hof) return PCBMarking.from_values(in_info["remote_ia"], in_info["remote_if"], in_info["mtu"], out_info["remote_ia"], out_info["remote_if"], hof) def _terminate_pcb(self, pcb): """ Copies a PCB, terminates it and adds the segment ID. Terminating a PCB means adding a opaque field with the egress IF set to 0, i.e., there is no AS to forward a packet containing this path segment to. """ pcb = pcb.copy() asm = self._create_asm(pcb.p.ifID, 0, pcb.get_timestamp(), pcb.last_hof()) if not asm: return None pcb.add_asm(asm) return pcb def handle_ifid_packet(self, pld, meta): """ Update the interface state for the corresponding interface. :param pld: The IFIDPayload. :type pld: IFIDPayload """ ifid = pld.p.relayIF with self.ifid_state_lock: if ifid not in self.ifid_state: raise SCIONKeyError("Invalid IF %d in IFIDPayload" % ifid) br = self.ifid2br[ifid] br.interface.to_if_id = pld.p.origIF prev_state = self.ifid_state[ifid].update() if prev_state == InterfaceState.INACTIVE: logging.info("IF %d activated", ifid) elif prev_state in [ InterfaceState.TIMED_OUT, InterfaceState.REVOKED ]: logging.info("IF %d came back up.", ifid) if not prev_state == InterfaceState.ACTIVE: if self.zk.have_lock(): # Inform BRs about the interface coming up. state_info = IFStateInfo.from_values( ifid, True, self._get_ht_proof(ifid)) pld = IFStatePayload.from_values([state_info]) for br in self.topology.get_all_border_routers(): meta = UDPMetadata.from_values(host=br.addr, port=br.port) self.send_meta(pld.copy(), meta, (br.addr, br.port)) def run(self): """ Run an instance of the Beacon Server. """ threading.Thread(target=thread_safety_net, args=(self.worker, ), name="BS.worker", daemon=True).start() # https://github.com/netsec-ethz/scion/issues/308: threading.Thread(target=thread_safety_net, args=(self._handle_if_timeouts, ), name="BS._handle_if_timeouts", daemon=True).start() threading.Thread(target=thread_safety_net, args=(self._create_next_tree, ), name="BS._create_next_tree", daemon=True).start() super().run() def _create_next_tree(self): last_ttl_window = 0 while self.run_flag.is_set(): start = time.time() cur_ttl_window = ConnectedHashTree.get_ttl_window() time_to_sleep = (ConnectedHashTree.get_time_till_next_ttl() - HASHTREE_UPDATE_WINDOW) if cur_ttl_window == last_ttl_window: time_to_sleep += HASHTREE_TTL if time_to_sleep > 0: sleep_interval(start, time_to_sleep, "BS._create_next_tree", self._quiet_startup()) # at this point, there should be <= HASHTREE_UPDATE_WINDOW # seconds left in current ttl logging.info("Started computing hashtree for next ttl") last_ttl_window = ConnectedHashTree.get_ttl_window() ifs = list(self.ifid2br.keys()) tree = ConnectedHashTree.get_next_tree(self.addr.isd_as, ifs, self.hashtree_gen_key) with self._hash_tree_lock: self._next_tree = tree def _maintain_hash_tree(self): """ Maintain the hashtree. Update the the windows in the connected tree """ with self._hash_tree_lock: if self._next_tree is not None: self._hash_tree.update(self._next_tree) self._next_tree = None else: logging.critical("Did not create hashtree in time; dying") kill_self() logging.info("New Hash Tree TTL beginning") def worker(self): """ Worker thread that takes care of reading shared PCBs from ZK, and propagating PCBS/registering paths when master. """ last_propagation = last_registration = 0 last_ttl_window = ConnectedHashTree.get_ttl_window() worker_cycle = 1.0 was_master = False start = time.time() while self.run_flag.is_set(): sleep_interval(start, worker_cycle, "BS.worker cycle", self._quiet_startup()) start = time.time() try: self.process_pcb_queue() self.handle_unverified_beacons() self.zk.wait_connected() self.pcb_cache.process() self.revobjs_cache.process() self.handle_rev_objs() cur_ttl_window = ConnectedHashTree.get_ttl_window() if cur_ttl_window != last_ttl_window: self._maintain_hash_tree() last_ttl_window = cur_ttl_window if not self.zk.get_lock(lock_timeout=0, conn_timeout=0): was_master = False continue if not was_master: self._became_master() was_master = True self.pcb_cache.expire(self.config.propagation_time * 10) self.revobjs_cache.expire(self.ZK_REV_OBJ_MAX_AGE) except ZkNoConnection: continue now = time.time() if now - last_propagation >= self.config.propagation_time: self.handle_pcbs_propagation() last_propagation = now if (self.config.registers_paths and now - last_registration >= self.config.registration_time): try: self.register_segments() except SCIONKeyError as e: logging.error("Register_segments: %s", e) pass last_registration = now def _became_master(self): """ Called when a BS becomes the new master. Resets some state that will be rebuilt over time. """ # Reset all timed-out and revoked interfaces to inactive. with self.ifid_state_lock: for (_, ifstate) in self.ifid_state.items(): if not ifstate.is_active(): ifstate.reset() def _try_to_verify_beacon(self, pcb, quiet=False): """ Try to verify a beacon. :param pcb: path segment to verify. :type pcb: PathSegment """ assert isinstance(pcb, PathSegment) asm = pcb.asm(-1) if self._check_trc(asm.isd_as(), asm.p.trcVer): if self._verify_beacon(pcb): self._handle_verified_beacon(pcb) else: logging.warning("Invalid beacon. %s", pcb) else: if not quiet: logging.warning("Certificate(s) or TRC missing for pcb: %s", pcb.short_desc()) self.unverified_beacons.append(pcb) @abstractmethod def _check_trc(self, isd_as, trc_ver): """ Return True or False whether the necessary Certificate and TRC files are found. :param ISD_AS isd_is: ISD-AS identifier. :param int trc_ver: TRC file version. """ raise NotImplementedError def _get_my_trc(self): return self.trust_store.get_trc(self.addr.isd_as[0]) def _get_my_cert(self): return self.trust_store.get_cert(self.addr.isd_as) def _get_trc(self, isd_as, trc_ver): """ Get TRC from local storage or memory. :param ISD_AS isd_as: ISD-AS identifier. :param int trc_ver: TRC file version. """ trc = self.trust_store.get_trc(isd_as[0], trc_ver) if not trc: # Requesting TRC file from cert server trc_tuple = isd_as[0], trc_ver now = int(time.time()) if (trc_tuple not in self.trc_requests or (now - self.trc_requests[trc_tuple] > self.REQUESTS_TIMEOUT)): trc_req = TRCRequest.from_values(isd_as, trc_ver) logging.info("Requesting %sv%s TRC", isd_as[0], trc_ver) try: addr, port = self.dns_query_topo(CERTIFICATE_SERVICE)[0] except SCIONServiceLookupError as e: logging.warning("Sending TRC request failed: %s", e) return None meta = UDPMetadata.from_values(host=addr, port=port) self.send_meta(trc_req, meta) self.trc_requests[trc_tuple] = now return None return trc def _verify_beacon(self, pcb): """ Once the necessary certificate and TRC files have been found, verify the beacons. :param pcb: path segment to verify. :type pcb: PathSegment """ assert isinstance(pcb, PathSegment) asm = pcb.asm(-1) cert_ia = asm.isd_as() trc = self.trust_store.get_trc(cert_ia[0], asm.p.trcVer) return verify_sig_chain_trc(pcb.sig_pack(), asm.p.sig, str(cert_ia), asm.chain(), trc, asm.p.trcVer) @abstractmethod def _handle_verified_beacon(self, pcb): """ Once a beacon has been verified, place it into the right containers. :param pcb: verified path segment. :type pcb: PathSegment """ raise NotImplementedError @abstractmethod def process_cert_chain_rep(self, cert_chain_rep, meta): """ Process the Certificate chain reply. """ raise NotImplementedError def process_trc_rep(self, rep, meta): """ Process the TRC reply. :param rep: TRC reply. :type rep: TRCReply """ logging.info("TRC reply received for %s", rep.trc.get_isd_ver()) self.trust_store.add_trc(rep.trc) rep_key = rep.trc.get_isd_ver() if rep_key in self.trc_requests: del self.trc_requests[rep_key] def handle_unverified_beacons(self): """ Handle beacons which are waiting to be verified. """ for _ in range(len(self.unverified_beacons)): pcb = self.unverified_beacons.popleft() self._try_to_verify_beacon(pcb, quiet=True) def process_rev_objects(self, rev_infos): """ Processes revocation infos stored in Zookeeper. """ with self.local_rev_cache_lock: for raw in rev_infos: try: rev_info = RevocationInfo.from_raw(raw) except SCIONParseError as e: logging.error( "Error processing revocation info from ZK: %s", e) continue self.local_rev_cache[rev_info] = rev_info.copy() def _issue_revocation(self, if_id): """ Store a RevocationInfo in ZK and send a revocation to all BRs. :param if_id: The interface that needs to be revoked. :type if_id: int """ # Only the master BS issues revocations. if not self.zk.have_lock(): return rev_info = self._get_ht_proof(if_id) logging.error("Issuing revocation for IF %d.", if_id) # Issue revocation to all BRs. info = IFStateInfo.from_values(if_id, False, rev_info) pld = IFStatePayload.from_values([info]) for br in self.topology.get_all_border_routers(): meta = UDPMetadata.from_values(host=br.addr, port=br.port) self.send_meta(pld.copy(), meta, (br.addr, br.port)) self._process_revocation(rev_info) self._send_rev_to_local_ps(rev_info) def _send_rev_to_local_ps(self, rev_info): """ Sends the given revocation to its local path server. :param rev_info: The RevocationInfo object :type rev_info: RevocationInfo """ if self.zk.have_lock() and self.topology.path_servers: try: addr, port = self.dns_query_topo(PATH_SERVICE)[0] except SCIONServiceLookupError: # If there are no local path servers, stop here. return logging.info("Sending revocation to local PS.") meta = UDPMetadata.from_values(host=addr, port=port) self.send_meta(rev_info.copy(), meta) def _handle_scmp_revocation(self, pld, meta): rev_info = RevocationInfo.from_raw(pld.info.rev_info) logging.info("Received revocation via SCMP:\n%s", rev_info.short_desc()) self._process_revocation(rev_info) def _handle_revocation(self, rev_info, meta): logging.info("Received revocation via TCP/UDP:\n%s", rev_info.short_desc()) if not self._validate_revocation(rev_info): return self._process_revocation(rev_info) def handle_rev_objs(self): with self.local_rev_cache_lock: for rev_info in self.local_rev_cache.values(): self._remove_revoked_pcbs(rev_info) def _process_revocation(self, rev_info): """ Removes PCBs containing a revoked interface and sends the revocation to the local PS. :param rev_info: The RevocationInfo object :type rev_info: RevocationInfo """ assert isinstance(rev_info, RevocationInfo) if_id = rev_info.p.ifID if not if_id: logging.error("Trying to revoke IF with ID 0.") return with self.local_rev_cache_lock: self.local_rev_cache[rev_info] = rev_info.copy() logging.info("Storing revocation in ZK.") rev_token = rev_info.copy().pack() entry_name = "%s:%s" % (hash(rev_token), time.time()) try: self.revobjs_cache.store(entry_name, rev_token) except ZkNoConnection as exc: logging.error("Unable to store revocation in shared cache " "(no ZK connection): %s" % exc) self._remove_revoked_pcbs(rev_info) @abstractmethod def _remove_revoked_pcbs(self, rev_info): """ Removes the PCBs containing the revoked interface. :param rev_info: The RevocationInfo object. :type rev_info: RevocationInfo """ raise NotImplementedError def _pcb_list_to_remove(self, candidates, rev_info): """ Calculates the list of PCBs to remove. Called by _remove_revoked_pcbs. :param candidates: Candidate PCBs. :type candidates: List :param rev_info: The RevocationInfo object. :type rev_info: RevocationInfo """ to_remove = [] processed = set() for cand in candidates: if cand.id in processed: continue processed.add(cand.id) if not ConnectedHashTree.verify_epoch(rev_info.p.epoch): continue # If the interface on which we received the PCB is # revoked, then the corresponding pcb needs to be removed, if # the proof can be verified with the own AS's root for the current # epoch and the if_id of the interface on which pcb was received # matches that in the rev_info root_verify = ConnectedHashTree.verify(rev_info, self._get_ht_root()) if (self.addr.isd_as == rev_info.isd_as() and cand.pcb.p.ifID == rev_info.p.ifID and root_verify): to_remove.append(cand.id) for asm in cand.pcb.iter_asms(): if self._verify_revocation_for_asm(rev_info, asm, False): to_remove.append(cand.id) return to_remove def _handle_if_timeouts(self): """ Periodically checks each interface state and issues an if revocation, if no keep-alive message was received for IFID_TOUT. """ if_id_last_revoked = defaultdict(int) while self.run_flag.is_set(): start_time = time.time() with self.ifid_state_lock: for (if_id, if_state) in self.ifid_state.items(): cur_epoch = ConnectedHashTree.get_current_epoch() # Check if interface has timed-out. if ((if_state.is_expired() or if_state.is_revoked()) and (if_id_last_revoked[if_id] != cur_epoch)): if_id_last_revoked[if_id] = cur_epoch if not if_state.is_revoked(): logging.info("IF %d appears to be down.", if_id) self._issue_revocation(if_id) if_state.revoke_if_expired() sleep_interval(start_time, self.IF_TIMEOUT_INTERVAL, "Handle IF timeouts") def _handle_ifstate_request(self, req, meta): # Only master replies to ifstate requests. if not self.zk.have_lock(): return assert isinstance(req, IFStateRequest) logging.debug("Received ifstate req:\n%s", req) infos = [] with self.ifid_state_lock: if req.p.ifID == IFStateRequest.ALL_INTERFACES: ifid_states = self.ifid_state.items() elif req.p.ifID in self.ifid_state: ifid_states = [(req.p.ifID, self.ifid_state[req.p.ifID])] else: logging.error( "Received ifstate request from %s for unknown " "interface %s.", meta.get_addr(), req.p.ifID) return for (ifid, state) in ifid_states: # Don't include inactive interfaces in response. if state.is_inactive(): continue info = IFStateInfo.from_values(ifid, state.is_active(), self._get_ht_proof(ifid)) infos.append(info) if not infos and not self._quiet_startup(): logging.warning("No IF state info to put in response.") return payload = IFStatePayload.from_values(infos) self.send_meta(payload, meta, (meta.host, meta.port))
class PathServer(SCIONElement, metaclass=ABCMeta): """ The SCION Path Server. """ SERVICE_TYPE = PATH_SERVICE MAX_SEG_NO = 5 # TODO: replace by config variable. # ZK path for incoming PATHs ZK_PATH_CACHE_PATH = "path_cache" # ZK path for incoming REVs ZK_REV_CACHE_PATH = "rev_cache" # Max number of segments per propagation packet PROP_LIMIT = 5 # Max number of segments per ZK cache entry ZK_SHARE_LIMIT = 10 # Time to store revocations in zookeeper ZK_REV_OBJ_MAX_AGE = HASHTREE_EPOCH_TIME def __init__(self, server_id, conf_dir): """ :param str server_id: server identifier. :param str conf_dir: configuration directory. """ super().__init__(server_id, conf_dir) self.down_segments = PathSegmentDB(max_res_no=self.MAX_SEG_NO) self.core_segments = PathSegmentDB(max_res_no=self.MAX_SEG_NO) self.pending_req = defaultdict(list) # Dict of pending requests. # Used when l/cPS doesn't have up/dw-path. self.waiting_targets = defaultdict(list) self.revocations = ExpiringDict(1000, HASHTREE_EPOCH_TIME) # Contains PCBs that include revocations. self.pcb_cache = ExpiringDict(100, HASHTREE_EPOCH_TIME) self.pcb_cache_lock = Lock() # A mapping from (hash tree root of AS, IFID) to segments self.htroot_if2seg = ExpiringDict(1000, HASHTREE_TTL) self.htroot_if2seglock = Lock() self.CTRL_PLD_CLASS_MAP = { PayloadClass.PATH: { PMT.REQUEST: self.path_resolution, PMT.REPLY: self.handle_path_segment_record, PMT.REG: self.handle_path_segment_record, PMT.REVOCATION: self._handle_revocation, PMT.SYNC: self.handle_path_segment_record, }, } self.SCMP_PLD_CLASS_MAP = { SCMPClass.PATH: { SCMPPathClass.REVOKED_IF: self._handle_scmp_revocation, }, } self._segs_to_zk = deque() self._revs_to_zk = deque() self._zkid = ZkID.from_values(self.addr.isd_as, self.id, [(self.addr.host, self._port)]) self.zk = Zookeeper(self.topology.isd_as, PATH_SERVICE, self._zkid.copy().pack(), self.topology.zookeepers) self.zk.retry("Joining party", self.zk.party_setup) self.path_cache = ZkSharedCache(self.zk, self.ZK_PATH_CACHE_PATH, self._cached_entries_handler) self.rev_cache = ZkSharedCache(self.zk, self.ZK_REV_CACHE_PATH, self._rev_entries_handler) def worker(self): """ Worker thread that takes care of reading shared paths from ZK, and handling master election for core servers. """ worker_cycle = 1.0 start = SCIONTime.get_time() was_master = False while self.run_flag.is_set(): sleep_interval(start, worker_cycle, "cPS.worker cycle", self._quiet_startup()) start = SCIONTime.get_time() try: self.zk.wait_connected() self.path_cache.process() self.rev_cache.process() # Try to become a master. is_master = self.zk.get_lock(lock_timeout=0, conn_timeout=0) if is_master: if not was_master: logging.info("Became master") self.path_cache.expire(self.config.propagation_time * 10) self.rev_cache.expire(self.ZK_REV_OBJ_MAX_AGE) was_master = True else: was_master = False except ZkNoConnection: logging.warning('worker(): ZkNoConnection') pass self._update_master() self._propagate_and_sync() def _cached_entries_handler(self, raw_entries): """ Handles cached through ZK entries, passed as a list. """ count = 0 for raw in raw_entries: recs = PathSegmentRecords.from_raw(raw) for type_, pcb in recs.iter_pcbs(): count += 1 self._dispatch_segment_record(type_, pcb, from_zk=True) if count: logging.debug("Processed %s PCBs from ZK", count) def _update_master(self): pass def _rev_entries_handler(self, raw_entries): for raw in raw_entries: rev_info = RevocationInfo.from_raw(raw) self._remove_revoked_segments(rev_info) def _add_rev_mappings(self, pcb): """ Add if revocation token to segment ID mappings. """ segment_id = pcb.get_hops_hash() with self.htroot_if2seglock: for asm in pcb.iter_asms(): hof = asm.pcbm(0).hof() egress_h = (asm.p.hashTreeRoot, hof.egress_if) self.htroot_if2seg.setdefault(egress_h, set()).add(segment_id) ingress_h = (asm.p.hashTreeRoot, hof.ingress_if) self.htroot_if2seg.setdefault(ingress_h, set()).add(segment_id) @abstractmethod def _handle_up_segment_record(self, pcb, **kwargs): raise NotImplementedError @abstractmethod def _handle_down_segment_record(self, pcb, **kwargs): raise NotImplementedError @abstractmethod def _handle_core_segment_record(self, pcb, **kwargs): raise NotImplementedError def _add_segment(self, pcb, seg_db, name, reverse=False): res = seg_db.update(pcb, reverse=reverse) if res == DBResult.ENTRY_ADDED: self._add_rev_mappings(pcb) logging.info("%s-Segment registered: %s", name, pcb.short_desc()) return True elif res == DBResult.ENTRY_UPDATED: self._add_rev_mappings(pcb) logging.debug("%s-Segment updated: %s", name, pcb.short_desc()) return False def _handle_scmp_revocation(self, pld, meta): rev_info = RevocationInfo.from_raw(pld.info.rev_info) self._handle_revocation(rev_info, meta) def _handle_revocation(self, rev_info, meta): """ Handles a revocation of a segment, interface or hop. :param rev_info: The RevocationInfo object. """ assert isinstance(rev_info, RevocationInfo) if not self._validate_revocation(rev_info): return if meta.ia[0] != self.addr.isd_as[0]: logging.info("Dropping revocation received from a different ISD.") return if rev_info in self.revocations: logging.debug("Already received revocation. Dropping...") return False self.revocations[rev_info] = True logging.debug("Received revocation from %s:\n%s", meta.get_addr(), rev_info) self._revs_to_zk.append(rev_info.copy().pack()) # have to pack copy # Remove segments that contain the revoked interface. self._remove_revoked_segments(rev_info) # Update revocations for PCBs in the the PCB cache. with self.pcb_cache_lock: for segment in self.pcb_cache.values(): segment.add_rev_infos([rev_info.copy()]) # Forward revocation to other path servers. self._forward_revocation(rev_info, meta) def _remove_revoked_segments(self, rev_info): """ Try the previous and next hashes as possible astokens, and delete any segment that matches :param rev_info: The revocation info :type rev_info: RevocationInfo """ if not ConnectedHashTree.verify_epoch(rev_info.p.epoch): return (hash01, hash12) = ConnectedHashTree.get_possible_hashes(rev_info) if_id = rev_info.p.ifID with self.htroot_if2seglock: down_segs_removed = 0 core_segs_removed = 0 up_segs_removed = 0 for h in (hash01, hash12): for sid in self.htroot_if2seg.pop((h, if_id), []): if self.down_segments.delete( sid) == DBResult.ENTRY_DELETED: down_segs_removed += 1 if self.core_segments.delete( sid) == DBResult.ENTRY_DELETED: core_segs_removed += 1 if not self.topology.is_core_as: if (self.up_segments.delete(sid) == DBResult.ENTRY_DELETED): up_segs_removed += 1 logging.info( "Removed segments containing IF %d: " "UP: %d DOWN: %d CORE: %d" % (if_id, up_segs_removed, down_segs_removed, core_segs_removed)) @abstractmethod def _forward_revocation(self, rev_info, meta): """ Forwards a revocation to other path servers that need to be notified. :param rev_info: The RevInfo object. :param meta: The MessageMeta object. """ raise NotImplementedError def _send_path_segments(self, req, meta, up=None, core=None, down=None): """ Sends path-segments to requester (depending on Path Server's location). """ up = up or set() core = core or set() down = down or set() if not (up | core | down): logging.warning("No segments to send") return pld = PathRecordsReply.from_values( { PST.UP: up, PST.CORE: core, PST.DOWN: down }, ) self.send_meta(pld, meta) logging.info( "Sending PATH_REPLY with %d segment(s) to:%s " "port:%s in response to: %s", len(up | core | down), meta.get_addr(), meta.port, req.short_desc(), ) def _handle_pending_requests(self, dst_ia, sibra): to_remove = [] key = dst_ia, sibra # Serve pending requests. for req, meta in self.pending_req[key]: if self.path_resolution(req, meta, new_request=False): meta.close() to_remove.append((req, meta)) # Clean state. for req_meta in to_remove: self.pending_req[key].remove(req_meta) if not self.pending_req[key]: del self.pending_req[key] def handle_path_segment_record(self, seg_recs, meta): meta.close() # FIXME(PSz): validate before params = self._dispatch_params(seg_recs, meta) added = set() for type_, pcb in seg_recs.iter_pcbs(): added.update(self._dispatch_segment_record(type_, pcb, **params)) # Handling pending requests, basing on added segments. for dst_ia, sibra in added: self._handle_pending_requests(dst_ia, sibra) def _dispatch_segment_record(self, type_, seg, **kwargs): # Check that segment does not contain a revoked interface. if not self._validate_segment(seg): logging.debug("Not adding segment due to revoked interface:\n%s" % seg.short_desc()) return set() handle_map = { PST.UP: self._handle_up_segment_record, PST.CORE: self._handle_core_segment_record, PST.DOWN: self._handle_down_segment_record, } return handle_map[type_](seg, **kwargs) def _validate_segment(self, seg): """ Check segment for revoked upstream/downstream interfaces. :param seg: The PathSegment object. :return: False, if the path segment contains a revoked upstream/ downstream interface (not peer). True otherwise. """ for rev_info in list(self.revocations): if not ConnectedHashTree.verify_epoch(rev_info.p.epoch): self.revocations.pop(rev_info) continue for asm in seg.iter_asms(): pcbm = asm.pcbm(0) if (rev_info.isd_as() == asm.isd_as() and rev_info.p.ifID in [pcbm.p.inIF, pcbm.p.outIF]): logging.debug("Found revoked interface (%d) in segment " "%s." % (rev_info.p.ifID, seg.short_desc())) return False return True def _dispatch_params(self, pld, meta): return {} def _propagate_and_sync(self): self._share_via_zk() self._share_revs_via_zk() def _gen_prop_recs(self, queue, limit=PROP_LIMIT): count = 0 pcbs = defaultdict(list) while queue: count += 1 type_, pcb = queue.popleft() pcbs[type_].append(pcb.copy()) if count >= limit: yield (pcbs) count = 0 pcbs = defaultdict(list) if pcbs: yield (pcbs) @abstractmethod def path_resolution(self, path_request, meta, new_request): """ Handles all types of path request. """ raise NotImplementedError def _add_peer_revs(self, segments): """ Adds revocations to revoked peering interfaces in segments. :returns: Set with modified segments. Elements of the original set stay untouched. """ # TODO(shitz): This could be optimized, by keeping a map of (ISD_AS, IF) # -> RevocationInfo for peer revocations. modified_segs = set() current_epoch = ConnectedHashTree.get_current_epoch() for segment in segments: seg_id = segment.get_hops_hash() with self.pcb_cache_lock: if seg_id in self.pcb_cache: cached_seg = self.pcb_cache[seg_id] logging.debug("Adding segment from PCB cache to response:" " %s" % cached_seg.short_desc()) modified_segs.add(cached_seg) continue revs_to_add = set() for rev_info in list(self.revocations): if rev_info.p.epoch < current_epoch: self.revocations.pop(rev_info) continue for asm in segment.iter_asms(): if asm.isd_as() != rev_info.isd_as(): continue for pcbm in asm.iter_pcbms(1): hof = pcbm.hof() if rev_info.p.ifID in [ hof.ingress_if, hof.egress_if ]: revs_to_add.add(rev_info.copy()) if revs_to_add: new_seg = segment.copy() new_seg.add_rev_infos(list(revs_to_add)) logging.debug("Adding revocations to PCB: %s" % new_seg.short_desc()) self.pcb_cache[seg_id] = new_seg modified_segs.add(new_seg) else: modified_segs.add(segment) return modified_segs def _handle_waiting_targets(self, pcb): """ Handle any queries that are waiting for a path to any core AS in an ISD. """ dst_ia = pcb.first_ia() if not self.is_core_as(dst_ia): logging.warning("Invalid waiting target, not a core AS: %s", dst_ia) return self._send_waiting_queries(dst_ia[0], pcb) def _send_waiting_queries(self, dst_isd, pcb): targets = self.waiting_targets[dst_isd] if not targets: return path = pcb.get_path(reverse_direction=True) src_ia = pcb.first_ia() while targets: seg_req = targets.pop(0) meta = self.DefaultMeta.from_values(ia=src_ia, path=path, host=SVCType.PS_A) self.send_meta(seg_req, meta) logging.info("Waiting request (%s) sent via %s", seg_req.short_desc(), pcb.short_desc()) def _share_via_zk(self): if not self._segs_to_zk: return logging.info("Sharing %d segment(s) via ZK", len(self._segs_to_zk)) for pcb_dict in self._gen_prop_recs(self._segs_to_zk, limit=self.ZK_SHARE_LIMIT): seg_recs = PathSegmentRecords.from_values(pcb_dict) self._zk_write(seg_recs.pack()) def _share_revs_via_zk(self): if not self._revs_to_zk: return logging.info("Sharing %d revocation(s) via ZK", len(self._revs_to_zk)) while self._revs_to_zk: self._zk_write_rev(self._revs_to_zk.popleft()) def _zk_write(self, data): hash_ = SHA256.new(data).hexdigest() try: self.path_cache.store("%s-%s" % (hash_, SCIONTime.get_time()), data) except ZkNoConnection: logging.warning("Unable to store segment(s) in shared path: " "no connection to ZK") def _zk_write_rev(self, data): hash_ = SHA256.new(data).hexdigest() try: self.rev_cache.store("%s-%s" % (hash_, SCIONTime.get_time()), data) except ZkNoConnection: logging.warning("Unable to store revocation(s) in shared path: " "no connection to ZK") def run(self): """ Run an instance of the Path Server. """ threading.Thread(target=thread_safety_net, args=(self.worker, ), name="PS.worker", daemon=True).start() super().run()