Beispiel #1
0
class GroupingAPI(WebAPI):
    def __init__(self):
        self.logger = Logger("grouping")
        self.config = {}
        try:
            with open(CONFIG_PATH, 'rt') as config_fp:
                self.config = json.load(config_fp)
        except Exception as e:
            self.logger.writeInfo("Did not read config file: {}".format(e))

        super(GroupingAPI,
              self).__init__(oauth_config=self.config.get('oauth'))

        self.api_v2_0 = v2_0.Routes(self.logger, self.config.get('oauth'),
                                    self.app)

        self.add_routes(self.api_v2_0,
                        basepath="/{}/{}/v2.0".format(APINAMESPACE, APINAME))

        self.app.root_path = get_root_path("ipsgrouping")

    @secure_route('/')
    def root(self):
        return [APINAMESPACE + "/"]

    @secure_route('/' + APINAMESPACE + '/')
    def namespaceroot(self):
        return [APINAME + "/"]

    @secure_route('/' + APINAMESPACE + '/' + APINAME + "/")
    def nameroot(self):
        return [v + "/" for v in APIVERSIONS]

    @secure_route(APIBASE + 'selfcheck/', methods=['GET'])
    def selfcheck(self):
        """
        Return diagnostics as JSON, and an overall code:
            200 if "all ok".
            400 otherwise.
        """
        result = selfcheck.check(self._sfq)
        code = 200
        if not result.get('passed'):
            code = 400
        return (code, result)
class GarbageCollect(object):

    parent_tab = {
        'devices': [('nodes', 'node_id')],
        'senders': [('devices', 'device_id')],
        'receivers': [('devices', 'device_id')],
        'sources': [('devices', 'device_id')],
        'flows': [('devices', 'device_id'), ('sources', 'source_id')]
    }

    def __init__(self, registry, identifier, logger=None, interval=INTERVAL):
        """
        interval
            Number of seconds between checks / collections. An interval of '0'
            means 'never check'.
        """
        self.registry = registry
        self.logger = Logger("garbage_collect", logger)
        self.identifier = identifier
        if interval > 0:
            gevent.spawn_later(interval, self.garbage_collect)

    def garbage_collect(self):
        # Check to see if garbage collection hasn't been done recently (by another aggregator)
        # Uses ETCD's prevExist=false function
        # See https://github.com/coreos/etcd/blob/master/Documentation/api.md#atomic-compare-and-swap
        try:
            flag = self.registry.put_garbage_collection_flag(
                host=self.identifier, ttl=LOCK_TIMEOUT)
            if flag.status_code != 201:
                self.logger.writeDebug(
                    "Not collecting - another collector has recently collected"
                )
                return

            # Kick off a collection with a specified timeout.
            try:
                with gevent.Timeout(TIMEOUT, TooLong):
                    self._collect()

            finally:
                self.logger.writeDebug("remove flag")
                self._remove_flag()

        except Exception as e:
            self.logger.writeError(
                "Could not write garbage collect flag: {}".format(e))

        finally:
            # Always schedule another
            gevent.spawn_later(INTERVAL, self.garbage_collect)
            self.logger.writeDebug("scheduled...")

    def _collect(self):
        try:
            self.logger.writeDebug("Collecting: {}".format(self.identifier))

            # create list of nodes still alive
            alive_nodes = []

            health_dict = self.registry.get_healths()
            for h in health_dict.get('/health', {}).keys():
                node_name = h.split('/')[-1]
                alive_nodes.append(node_name)

            # TODO: GETs... maybe getting the whole response in one go is better?
            # Maybe doing these async is a good idea? For now, this suffices.
            all_types = [
                "nodes", "devices", "senders", "receivers", "sources", "flows"
            ]
            resources = {
                rtype: self.registry.get_all(rtype)
                for rtype in all_types
            }

            # Get a flat list of (type, resource) pairs for existing resources
            # TODO: combine with above
            all_resources = []
            for res_type, res in resources.items():
                all_resources += [(res_type, x) for x in res]

            # Initialise the removal queue with any dead nodes
            nodes = [x.strip('/') for x in self.registry.getresources("nodes")]

            # TODO: already have this above...
            kill_q = [('nodes', node_id) for node_id in nodes
                      if node_id not in alive_nodes]

            # Create a list of (type, id) pairs of resources that should be removed.
            to_kill = []

            # Find orphaned resources
            kill_q += self.__find_dead_resources(all_resources, to_kill)

            # Process the removal queue.
            while kill_q:
                gevent.sleep(0.0)

                # Add these resources to the list of removals
                to_kill += kill_q

                # Reduce search space; this resource can never parent another
                # This proves to be faster in the long run.
                all_resources = [
                    x for x in all_resources
                    if (x[0], x[1]['id']) not in to_kill
                ]

                # Look through remaining resources and get a new kill_q
                kill_q = self.__find_dead_resources(all_resources, to_kill)

            for resource_type, resource_id in to_kill:
                self.logger.writeInfo("removing resource: {}/{}".format(
                    resource_type, resource_id))
                self.registry.delete(resource_type, resource_id)

        except self.registry.RegistryUnavailable:
            self.logger.writeWarning("registry unavailable")

        except TooLong:
            self.logger.writeWarning("took too long")

        except Exception as e:
            self.logger.writeError("unhandled exception: {}".format(e))

    def __find_dead_resources(self, all_resources, to_kill):
        def is_alive(parent_def):
            if parent_def in to_kill:
                return False
            parent_type, parent_id = parent_def
            found_parent = next(
                (x for x in all_resources
                 if x[0] == parent_type and x[1]['id'] == parent_id), None)
            return found_parent is not None

        # Build a list of resource to remove
        kill_q = []

        # Look through all remaining resources
        for child_type, child in all_resources:

            # We need never consider nodes; they should have already been marked.
            if child_type == "nodes":
                continue

            child_id = child['id']

            # Get parent for child. There is only ever one; anything with multiple
            # parent entries in the parent table has multiple entries for backward
            # compatibility, in order strongest->weakest.
            parents = [(parent_type, child.get(parent_key))
                       for parent_type, parent_key in self.parent_tab.get(
                           child_type, (None, None))]
            parent = next((x for x in parents if x[1] is not None), None)
            if parent is None or not is_alive(parent):
                kill_q.append((child_type, child_id))

        return kill_q

    def _remove_flag(self):
        try:
            self.registry.delete_raw("garbage_collection")
        except Exception as e:
            self.logger.writeWarning("Could not remove flag: {}".format(e))
Beispiel #3
0
class NodeFacadeService:
    def __init__(self, interactive=False):
        self.logger = Logger("facade", None)
        if HOST == "":
            self.logger.writeFatal(
                "Unable to start facade due to lack of connectivity")
            sys.exit(1)
        self.running = False
        self.httpServer = None
        self.interface = None
        self.interactive = interactive
        self.registry = None
        self.registry_cleaner = None
        self.node_id = None
        self.mdns = MDNSEngine()
        self.mdnsname_suffix = '_' + str(HOSTNAME) + "_" + str(getpid())
        self.mappings = {
            "device": "ver_dvc",
            "flow": "ver_flw",
            "source": "ver_src",
            "sender": "ver_snd",
            "receiver": "ver_rcv",
            "self": "ver_slf"
        }
        self.mdns_updater = MDNSUpdater(self.mdns,
                                        "_nmos-node._tcp",
                                        "node" + self.mdnsname_suffix,
                                        self.mappings,
                                        PORT,
                                        self.logger,
                                        txt_recs={
                                            "api_ver": "v1.0,v1.1,v1.2",
                                            "api_proto": "http"
                                        })
        self.aggregator = Aggregator(self.logger, self.mdns_updater)

    def sig_handler(self):
        print 'Pressed ctrl+c'
        self.stop()

    def sig_hup_handler(self):
        if getLocalIP() != "":
            global HOST
            HOST = updateHost()
            self.registry.modify_node(href=self.generate_href(),
                                      host=HOST,
                                      api={
                                          "versions": NODE_APIVERSIONS,
                                          "endpoints":
                                          self.generate_endpoints()
                                      },
                                      interfaces=self.list_interfaces())

    def generate_endpoints(self):
        endpoints = []
        if HTTPS_MODE != "enabled":
            endpoints.append({
                "host": HOST,
                "port": 80,  #Everything should go via apache proxy
                "protocol": "http"
            })
        if HTTPS_MODE != "disabled":
            endpoints.append({
                "host": HOST,
                "port": 443,  #Everything should go via apache proxy
                "protocol": "https"
            })
        return endpoints

    def generate_href(self):
        if HTTPS_MODE == "enabled":
            return "https://{}/".format(HOST)
        else:
            return "http://{}/".format(HOST)

    def list_interfaces(self):
        interfaces = {}
        # Initially populate interfaces from known-good location
        net_path = "/sys/class/net/"
        if os.path.exists(net_path):
            for interface_name in os.listdir(net_path):
                if interface_name != "lo":
                    address_path = net_path + interface_name + "/address"
                    if os.path.exists(address_path):
                        address = open(address_path, "r").readline()
                        interfaces[interface_name] = {
                            "name":
                            interface_name,
                            "chassis_id":
                            None,
                            "port_id":
                            address.lower().strip("\n").replace(":", "-")
                        }

        # Attempt to source proper LLDP data for interfaces
        if os.path.exists("/usr/sbin/lldpcli"):
            try:
                chassis_data = json.loads(
                    check_output(
                        ["/usr/sbin/lldpcli", "show", "chassis", "-f",
                         "json"]))
                chassis_id = chassis_data["local-chassis"]['chassis'].values(
                )[0]["id"]["value"]
                if chassis_data["local-chassis"]['chassis'].values(
                )[0]["id"]["type"] == "mac":
                    chassis_id = chassis_id.lower().replace(":", "-")
                interface_data = json.loads(
                    check_output([
                        "/usr/sbin/lldpcli", "show", "statistics", "-f", "json"
                    ]))
                if isinstance(interface_data["lldp"]["interface"], dict):
                    for interface_name in interface_data["lldp"][
                            "interface"].keys():
                        if interface_name in interfaces:
                            # Only correct the Chassis ID. Port ID MUST be a MAC address
                            interfaces[interface_name][
                                "chassis_id"] = chassis_id
                else:
                    for interface_block in interface_data["lldp"]["interface"]:
                        interface_name = interface_block.keys()[0]
                        if interface_name in interfaces:
                            # Only correct the Chassis ID. Port ID MUST be a MAC address
                            interfaces[interface_name][
                                "chassis_id"] = chassis_id
            except Exception:
                pass

        return interfaces.values()

    def start(self):
        if self.running:
            gevent.signal(signal.SIGINT, self.sig_handler)
            gevent.signal(signal.SIGTERM, self.sig_handler)
            gevent.signal(signal.SIGHUP, self.sig_hup_handler)

        self.mdns.start()
        self.node_id = get_node_id()
        node_version = str(ptptime.ptp_detail()[0]) + ":" + str(
            ptptime.ptp_detail()[1])
        node_data = {
            "id":
            self.node_id,
            "label":
            nmoscommonconfig.config.get('node_label', FQDN),
            "description":
            nmoscommonconfig.config.get('node_description',
                                        "Node on {}".format(FQDN)),
            "tags":
            nmoscommonconfig.config.get('node_tags', {}),
            "href":
            self.generate_href(),
            "host":
            HOST,
            "services": [],
            "hostname":
            HOSTNAME,
            "caps": {},
            "version":
            node_version,
            "api": {
                "versions": NODE_APIVERSIONS,
                "endpoints": self.generate_endpoints(),
            },
            "clocks": [
                {
                    "name": "clk0",
                    "ref_type": "internal",
                },
                {
                    "name": "clk1",
                    "ref_type": "ptp",
                    "version": "IEEE1588-2008",
                    "traceable": False,
                    "gmid": "00-00-00-00-00-00-00-00",
                    "locked": False,
                },
                # Extra values will be filled in as needed at point of checking
            ],
            "interfaces":
            self.list_interfaces()
        }
        self.registry = FacadeRegistry(self.mappings.keys(), self.aggregator,
                                       self.mdns_updater, self.node_id,
                                       node_data, self.logger)
        self.registry_cleaner = FacadeRegistryCleaner(self.registry)
        self.registry_cleaner.start()
        self.httpServer = HttpServer(FacadeAPI,
                                     PORT,
                                     '0.0.0.0',
                                     api_args=[self.registry])
        self.httpServer.start()
        while not self.httpServer.started.is_set():
            self.logger.writeInfo('Waiting for httpserver to start...')
            self.httpServer.started.wait()

        if self.httpServer.failed is not None:
            raise self.httpServer.failed

        self.logger.writeInfo("Running on port: {}".format(
            self.httpServer.port))

        try:
            self.logger.writeInfo("Registering as {}...".format(self.node_id))
            self.aggregator.register(
                'node', self.node_id,
                **legalise_resource(node_data, "node", NODE_REGVERSION))
        except Exception as e:
            self.logger.writeWarning("Could not register: {}".format(
                e.__repr__()))

        self.interface = FacadeInterface(self.registry, self.logger)
        self.interface.start()

    def run(self):
        self.running = True
        pidfile = "/tmp/ips-nodefacade.pid"
        file(pidfile, 'w').write(str(getpid()))
        self.start()
        daemon.notify("READY=1")
        while self.running:
            self.registry.update_ptp()
            time.sleep(1)
        os.unlink(pidfile)
        self._cleanup()

    def _cleanup(self):
        try:
            self.logger.writeDebug("cleanup: unregister facade " +
                                   self.node_id)
            self.aggregator.unregister('node', self.node_id)
        except Exception as e:
            self.logger.writeWarning("Could not unregister: {}".format(e))

        if self.mdns:
            try:
                self.mdns.stop()
            except Exception as e:
                self.logger.writeWarning("Could not stop mdns: {}".format(e))

        self.registry_cleaner.stop()
        self.interface.stop()
        self.httpServer.stop()
        self.aggregator.stop()
        self.logger.writeInfo("Stopped main()")

    def stop(self):
        self.running = False
Beispiel #4
0
class GroupingService:
    def __init__(self, interactive=False):
        self.facades = {}
        for version in APIVERSIONS:
            self.facades[version] = Facade("{}/{}".format(APINAME, version))
        self.logger = Logger("grouping", None)
        self.running = False
        self.httpServer = None
        with open(CONFIG_PATH) as config_fp:
            self.config = json.load(config_fp)

    def _sig_handler(self):
        self.logger.writeInfo("Stopping...")
        self._stop()

    def _start(self):
        gevent.signal(signal.SIGINT, self._sig_handler)
        gevent.signal(signal.SIGTERM, self._sig_handler)

        self.httpServer = HttpServer(GroupingAPI,
                                     PORT,
                                     '0.0.0.0',
                                     ssl=self.config.get("ssl"))
        self.httpServer.start()
        while not self.httpServer.started.is_set():
            self.logger.writeInfo('Waiting for httpserver to start...')
            self.httpServer.started.wait()

        if self.httpServer.failed is not None:
            raise self.httpServer.failed

        self.httpServer.api.app.config.update(self.config)
        db_mongo.init_app(self.httpServer.api.app, logger=self.logger)
        self.logger.writeInfo("Running on port: {}".format(
            self.httpServer.port))

    def run(self):
        self.running = True
        self._start()
        for version in APIVERSIONS:
            self.facades[version].register_service(
                "http://127.0.0.1:" + str(PORT),
                "{}/{}/{}/".format(APINAMESPACE, APINAME, version))
        itercount = 0
        while self.running:
            if itercount % 5 == 0:
                for facade in self.facades:
                    self.facades[facade].heartbeat_service()
            gevent.sleep(1)
            itercount += 1
            if itercount == 10:
                itercount = 0
        for facade in self.facades:
            self.facades[facade].unregister_service()
        self._cleanup()

    def _cleanup(self):
        self.httpServer.stop()
        self.logger.writeInfo("Stopped")

    def _stop(self):
        self.running = False
Beispiel #5
0
class Aggregator(object):
    """This class serves as a proxy for the distant aggregation service running elsewhere on the network.
    It will search out aggregators and locate them, falling back to other ones if the one it is connected to
    disappears, and resending data as needed."""
    def __init__(self, logger=None, mdns_updater=None):
        self.logger = Logger("aggregator_proxy", logger)
        self.mdnsbridge = IppmDNSBridge(logger=self.logger)
        self.aggregator = ""
        self.registration_order = [
            "device", "source", "flow", "sender", "receiver"
        ]
        self._mdns_updater = mdns_updater
        # 'registered' is a local mirror of aggregated items. There are helper methods
        # for manipulating this below.
        self._registered = {
            'node': None,
            'registered': False,
            'entities': {
                'resource': {}
            }
        }
        self._running = True
        self._reg_queue = gevent.queue.Queue()
        self.heartbeat_thread = gevent.spawn(self._heartbeat)
        self.queue_thread = gevent.spawn(self._process_queue)

    # The heartbeat thread runs in the background every five seconds.
    # If when it runs the Node is believed to be registered it will perform a heartbeat
    def _heartbeat(self):
        self.logger.writeDebug("Starting heartbeat thread")
        while self._running:
            heartbeat_wait = 5
            if not self._registered["registered"]:
                self._process_reregister()
            elif self._registered["node"]:
                # Do heartbeat
                try:
                    self.logger.writeDebug(
                        "Sending heartbeat for Node {}".format(
                            self._registered["node"]["data"]["id"]))
                    self._SEND(
                        "POST", "/health/nodes/" +
                        self._registered["node"]["data"]["id"])
                except InvalidRequest as e:
                    if e.status_code == 404:
                        # Re-register
                        self.logger.writeWarning(
                            "404 error on heartbeat. Marking Node for re-registration"
                        )
                        self._registered["registered"] = False

                        if (self._mdns_updater is not None):
                            self._mdns_updater.inc_P2P_enable_count()
                    else:
                        # Client side error. Report this upwards via exception, but don't resend
                        self.logger.writeError(
                            "Unrecoverable error code {} received from Registration API on heartbeat"
                            .format(e.status_code))
                        self._running = False
                except:
                    # Re-register
                    self.logger.writeWarning(
                        "Unexpected error on heartbeat. Marking Node for re-registration"
                    )
                    self._registered["registered"] = False
            else:
                self._registered["registered"] = False
                if (self._mdns_updater is not None):
                    self._mdns_updater.inc_P2P_enable_count()
            while heartbeat_wait > 0 and self._running:
                gevent.sleep(1)
                heartbeat_wait -= 1
        self.logger.writeDebug("Stopping heartbeat thread")

    # Provided the Node is believed to be correctly registered, hand off a single request to the SEND method
    # On client error, clear the resource from the local mirror
    # On other error, mark Node as unregistered and trigger re-registration
    def _process_queue(self):
        self.logger.writeDebug("Starting HTTP queue processing thread")
        while self._running or (
                self._registered["registered"] and not self._reg_queue.empty()
        ):  # Checks queue not empty before quitting to make sure unregister node gets done
            if not self._registered["registered"] or self._reg_queue.empty():
                gevent.sleep(1)
            else:
                try:
                    queue_item = self._reg_queue.get()
                    namespace = queue_item["namespace"]
                    res_type = queue_item["res_type"]
                    res_key = queue_item["key"]
                    if queue_item["method"] == "POST":
                        if res_type == "node":
                            data = self._registered["node"]
                            try:
                                self.logger.writeInfo(
                                    "Attempting registration for Node {}".
                                    format(self._registered["node"]["data"]
                                           ["id"]))
                                self._SEND("POST", "/{}".format(namespace),
                                           data)
                                self._SEND(
                                    "POST", "/health/nodes/" +
                                    self._registered["node"]["data"]["id"])
                                self._registered["registered"] = True
                                if self._mdns_updater is not None:
                                    self._mdns_updater.P2P_disable()

                            except Exception as ex:
                                self.logger.writeWarning(
                                    "Error registering Node: %r" %
                                    (traceback.format_exc(), ))

                        elif res_key in self._registered["entities"][
                                namespace][res_type]:
                            data = self._registered["entities"][namespace][
                                res_type][res_key]
                            try:
                                self._SEND("POST", "/{}".format(namespace),
                                           data)
                            except InvalidRequest as e:
                                self.logger.writeWarning(
                                    "Error registering {} {}: {}".format(
                                        res_type, res_key, e))
                                self.logger.writeWarning(
                                    "Request data: {}".format(
                                        self._registered["entities"][namespace]
                                        [res_type][res_key]))
                                del self._registered["entities"][namespace][
                                    res_type][res_key]

                    elif queue_item["method"] == "DELETE":
                        translated_type = res_type + 's'
                        try:
                            self._SEND(
                                "DELETE",
                                "/{}/{}/{}".format(namespace, translated_type,
                                                   res_key))
                        except InvalidRequest as e:
                            self.logger.writeWarning(
                                "Error deleting resource {} {}: {}".format(
                                    translated_type, res_key, e))
                    else:
                        self.logger.writeWarning(
                            "Method {} not supported for Registration API interactions"
                            .format(queue_item["method"]))
                except Exception as e:
                    self._registered["registered"] = False
                    if (self._mdns_updater is not None):
                        self._mdns_updater.P2P_disable()
        self.logger.writeDebug("Stopping HTTP queue processing thread")

    # Queue a request to be processed. Handles all requests except initial Node POST which is done in _process_reregister
    def _queue_request(self, method, namespace, res_type, key):
        self._reg_queue.put({
            "method": method,
            "namespace": namespace,
            "res_type": res_type,
            "key": key
        })

    # Register 'resource' type data including the Node
    # NB: Node registration is managed by heartbeat thread so may take up to 5 seconds!
    def register(self, res_type, key, **kwargs):
        self.register_into("resource", res_type, key, **kwargs)

    # Unregister 'resource' type data including the Node
    def unregister(self, res_type, key):
        self.unregister_from("resource", res_type, key)

    # General register method for 'resource' types
    def register_into(self, namespace, res_type, key, **kwargs):
        data = kwargs
        send_obj = {"type": res_type, "data": data}
        if 'id' not in send_obj["data"]:
            self.logger.writeWarning(
                "No 'id' present in data, using key='{}': {}".format(
                    key, data))
            send_obj["data"]["id"] = key

        if namespace == "resource" and res_type == "node":
            # Handle special Node type
            self._registered["node"] = send_obj
        else:
            self._add_mirror_keys(namespace, res_type)
            self._registered["entities"][namespace][res_type][key] = send_obj
        self._queue_request("POST", namespace, res_type, key)

    # General unregister method for 'resource' types
    def unregister_from(self, namespace, res_type, key):
        if namespace == "resource" and res_type == "node":
            # Handle special Node type
            self._registered["node"] = None
        elif res_type in self._registered["entities"][namespace]:
            self._add_mirror_keys(namespace, res_type)
            if key in self._registered["entities"][namespace][res_type]:
                del self._registered["entities"][namespace][res_type][key]
        self._queue_request("DELETE", namespace, res_type, key)

    # Deal with missing keys in local mirror
    def _add_mirror_keys(self, namespace, res_type):
        if namespace not in self._registered["entities"]:
            self._registered["entities"][namespace] = {}
        if res_type not in self._registered["entities"][namespace]:
            self._registered["entities"][namespace][res_type] = {}

    # Re-register just the Node, and queue requests in order for other resources
    def _process_reregister(self):
        if self._registered.get("node", None) is None:
            self.logger.writeDebug("No node registered, re-register returning")
            return

        try:
            self.logger.writeDebug(
                "Clearing old Node from API prior to re-registration")
            self._SEND(
                "DELETE",
                "/resource/nodes/" + self._registered["node"]["data"]["id"])
        except InvalidRequest as e:
            # 404 etc is ok
            self.logger.writeInfo(
                "Invalid request when deleting Node prior to registration: {}".
                format(e))
        except Exception as ex:
            # Server error is bad, no point continuing
            self.logger.writeError("Aborting Node re-register! {}".format(ex))
            return

        self._registered["registered"] = False
        if (self._mdns_updater is not None):
            self._mdns_updater.inc_P2P_enable_count()

        # Drain the queue
        while not self._reg_queue.empty():
            try:
                self._reg_queue.get(block=False)
            except gevent.queue.Queue.Empty:
                break

        try:
            # Register the node, and immediately heartbeat if successful to avoid race with garbage collect.
            self.logger.writeInfo(
                "Attempting re-registration for Node {}".format(
                    self._registered["node"]["data"]["id"]))
            self._SEND("POST", "/resource", self._registered["node"])
            self._SEND(
                "POST",
                "/health/nodes/" + self._registered["node"]["data"]["id"])
            self._registered["registered"] = True
            if self._mdns_updater is not None:
                self._mdns_updater.P2P_disable()
        except Exception as e:
            self.logger.writeWarning("Error re-registering Node: {}".format(e))
            self.aggregator == ""  # Fallback to prevent us getting stuck if the Reg API issues a 4XX error incorrectly
            return

        # Re-register items that must be ordered
        # Re-register things we have in the local cache.
        # "namespace" is e.g. "resource"
        # "entities" are the things associated under that namespace.
        for res_type in self.registration_order:
            for namespace, entities in self._registered["entities"].items():
                if res_type in entities:
                    self.logger.writeInfo(
                        "Ordered re-registration for type: '{}' in namespace '{}'"
                        .format(res_type, namespace))
                    for key in entities[res_type]:
                        self._queue_request("POST", namespace, res_type, key)

        # Re-register everything else
        # Re-register things we have in the local cache.
        # "namespace" is e.g. "resource"
        # "entities" are the things associated under that namespace.
        for namespace, entities in self._registered["entities"].items():
            for res_type in entities:
                if res_type not in self.registration_order:
                    self.logger.writeInfo(
                        "Unordered re-registration for type: '{}' in namespace '{}'"
                        .format(res_type, namespace))
                    for key in entities[res_type]:
                        self._queue_request("POST", namespace, res_type, key)

    # Stop the Aggregator object running
    def stop(self):
        self.logger.writeDebug("Stopping aggregator proxy")
        self._running = False
        self.heartbeat_thread.join()
        self.queue_thread.join()

    # Handle sending all requests to the Registration API, and searching for a new 'aggregator' if one fails
    def _SEND(self, method, url, data=None):
        if self.aggregator == "":
            self.aggregator = self.mdnsbridge.getHref(REGISTRATION_MDNSTYPE)

        if data is not None:
            data = json.dumps(data)

        url = AGGREGATOR_APINAMESPACE + "/" + AGGREGATOR_APINAME + "/" + AGGREGATOR_APIVERSION + url
        for i in range(0, 3):
            if self.aggregator == "":
                self.logger.writeWarning(
                    "No aggregator available on the network or mdnsbridge unavailable"
                )
                raise NoAggregator(self._mdns_updater)

            self.logger.writeDebug("{} {}".format(
                method, urljoin(self.aggregator, url)))

            # We give a long(ish) timeout below, as the async request may succeed after the timeout period
            # has expired, causing the node to be registered twice (potentially at different aggregators).
            # Whilst this isn't a problem in practice, it may cause excessive churn in websocket traffic
            # to web clients - so, sacrifice a little timeliness for things working as designed the
            # majority of the time...
            try:
                if nmoscommonconfig.config.get('prefer_ipv6', False) == False:
                    R = requests.request(method,
                                         urljoin(self.aggregator, url),
                                         data=data,
                                         timeout=1.0)
                else:
                    R = requests.request(method,
                                         urljoin(self.aggregator, url),
                                         data=data,
                                         timeout=1.0,
                                         proxies={'http': ''})
                if R is None:
                    # Try another aggregator
                    self.logger.writeWarning(
                        "No response from aggregator {}".format(
                            self.aggregator))

                elif R.status_code in [200, 201]:
                    if R.headers.get(
                            "content-type",
                            "text/plain").startswith("application/json"):
                        return R.json()
                    else:
                        return R.content

                elif R.status_code == 204:
                    return

                elif (R.status_code / 100) == 4:
                    self.logger.writeWarning(
                        "{} response from aggregator: {} {}".format(
                            R.status_code, method,
                            urljoin(self.aggregator, url)))
                    raise InvalidRequest(R.status_code, self._mdns_updater)

                else:
                    self.logger.writeWarning(
                        "Unexpected status from aggregator {}: {}, {}".format(
                            self.aggregator, R.status_code, R.content))

            except requests.exceptions.RequestException as ex:
                # Log a warning, then let another aggregator be chosen
                self.logger.writeWarning("{} from aggregator {}".format(
                    ex, self.aggregator))

            # This aggregator is non-functional
            self.aggregator = self.mdnsbridge.getHref(REGISTRATION_MDNSTYPE)
            self.logger.writeInfo("Updated aggregator to {} (try {})".format(
                self.aggregator, i))

        raise TooManyRetries(self._mdns_updater)
Beispiel #6
0
class Aggregator(object):
    """This class serves as a proxy for the distant aggregation service running elsewhere on the network.
    It will search out aggregators and locate them, falling back to other ones if the one it is connected to
    disappears, and resending data as needed."""
    def __init__(self, logger=None, mdns_updater=None, auth_registry=None):
        self.logger = Logger("aggregator_proxy", logger)
        self.mdnsbridge = IppmDNSBridge(logger=self.logger)
        self.aggregator_apiversion = None
        self.service_type = None
        self._set_api_version_and_srv_type(
            _config.get('nodefacade').get('NODE_REGVERSION'))
        self.aggregator = None
        self.registration_order = [
            "device", "source", "flow", "sender", "receiver"
        ]
        self._mdns_updater = mdns_updater
        # '_node_data' is a local mirror of aggregated items.
        self._node_data = {
            'node': None,
            'registered': False,
            'entities': {
                'resource': {}
            }
        }
        self._running = True
        self._aggregator_list_stale = True
        self._aggregator_failure = False  # Variable to flag when aggregator has returned and unexpected error
        self._backoff_active = False
        self._backoff_period = 0

        self.auth_registrar = None  # Class responsible for registering with Auth Server
        self.auth_registry = auth_registry  # Top level class that tracks locally registered OAuth clients
        self.auth_client = None  # Instance of Oauth client responsible for performing token requests

        self._reg_queue = gevent.queue.Queue()
        self.main_thread = gevent.spawn(self._main_thread)
        self.queue_thread = gevent.spawn(self._process_queue)

    def _set_api_version_and_srv_type(self, api_ver):
        """Set the aggregator api version equal to parameter and DNS-SD service type based on api version"""
        self.aggregator_apiversion = api_ver
        self._set_service_type(api_ver)

    def _set_service_type(self, api_ver):
        """Set DNS-SD service type based on current api version in use"""
        if api_ver in ['v1.0', 'v1.1', 'v1.2']:
            self.service_type = LEGACY_REG_MDNSTYPE
        else:
            self.service_type = REGISTRATION_MDNSTYPE

    def _main_thread(self):
        """The main thread runs in the background.
        If, when it runs, the Node is believed to be registered it will perform a heartbeat every 5 seconds.
        If the Node is not registered it will try to register the Node"""
        self.logger.writeDebug("Starting main thread")

        while self._running:
            if self._node_data["node"] and self.aggregator is None:
                self._discovery_operation()
            elif self._node_data["node"] and self._node_data["registered"]:
                self._registered_operation()
            else:
                self._node_data["registered"] = False
                self.aggregator = None
                gevent.sleep(0.2)

        self.logger.writeDebug("Stopping heartbeat thread")

    def _discovery_operation(self):
        """In Discovery operation the Node will wait a backoff period if defined to allow aggregators to recover when in
        a state of error. Selecting the most appropriate aggregator and try to register with it.
        If a registration fails then another aggregator will be tried."""
        self.logger.writeDebug("Entering Discovery Mode")

        # Wait backoff period
        # Do not wait backoff period if aggregator failed, a new aggregator should be tried immediately
        if not self._aggregator_failure:
            self._back_off_timer()

        self._aggregator_failure = False

        # Update cached list of aggregators
        if self._aggregator_list_stale:
            self._flush_cached_aggregators()

        while True:
            self.aggregator = self._get_aggregator()
            if self.aggregator is None:
                self.logger.writeDebug("Failed to find registration API")
                break
            self.logger.writeDebug("Aggregator set to: {}".format(
                self.aggregator))

            # Perform initial heartbeat, which will attempt to register Node if not already registered
            if self._heartbeat():
                # Successfully registered Node with aggregator andproceed to registered operation
                # Else will try next aggregator
                break

    def _registered_operation(self):
        """In Registered operation, the Node is registered so a heartbeat will be performed,
        if the heartbeat is successful the Node will wait 5 seconds before attempting another heartbeat.
        Else another aggregator will be selected"""
        if not self._heartbeat():
            # Heartbeat failed
            # Flag to update cached list of aggregators and immediately try new aggregator
            self.aggregator = None
            self._aggregator_failure = True

    def _heartbeat(self):
        """Performs a heartbeat to registered aggregator
        If heartbeat fails it will take actions to correct the error, by re-registering the Node
        If successfull will return True, else will return False"""
        if not self.aggregator:
            return False
        try:
            R = self._send(
                "POST", self.aggregator, self.aggregator_apiversion,
                "health/nodes/{}".format(
                    self._node_data["node"]["data"]["id"]))

            if R.status_code == 200 and self._node_data["registered"]:
                # Continue to registered operation
                self.logger.writeDebug(
                    "Successful heartbeat for Node {}".format(
                        self._node_data["node"]["data"]["id"]))
                self._registered()
                heartbeat_wait = 5
                while heartbeat_wait > 0 and self._running:
                    gevent.sleep(1)
                    heartbeat_wait -= 1
                return True

            elif R.status_code in [200, 409]:
                # Delete node from registry
                if self._unregister_node(R.headers.get('Location')):
                    return self._register_node(self._node_data["node"])
                else:
                    # Try next registry
                    return False

        except InvalidRequest as e:
            if e.status_code == 404:
                # Re-register
                self.logger.writeWarning(
                    "404 error on heartbeat. Marking Node for re-registration")
                self._node_data["registered"] = False
                return self._register_node(self._node_data["node"])
            else:
                # Other error, try next registry
                return False
        except ServerSideError:
            self.logger.writeWarning(
                "Server Side Error on heartbeat. Trying another registry")
            return False
        except Exception as e:
            # Re-register
            self.logger.writeWarning(
                "Unexpected error on heartbeat: {}. Marking Node for re-registration"
                .format(e))
            self._node_data["registered"] = False
            return False

    def _register_auth(self, client_name, client_uri):
        """Register OAuth client with Authorization Server"""
        self.logger.writeInfo(
            "Attempting to register dynamically with Auth Server")
        auth_registrar = AuthRegistrar(client_name=client_name,
                                       redirect_uri=PROTOCOL + '://' + FQDN +
                                       NODE_APIROOT + 'authorize',
                                       client_uri=client_uri,
                                       allowed_scope=ALLOWED_SCOPE,
                                       allowed_grant=ALLOWED_GRANTS)
        if auth_registrar.registered is True:
            return auth_registrar
        else:
            self.logger.writeWarning(
                "Unable to successfully register with Authorization Server")

    def _register_node(self, node_obj):
        """Attempt to register Node with aggregator
        Returns True is node was successfully registered with aggregator
        Returns False if registration failed
        If registration failed with 200 or 409, will attempt to delete and re-register"""
        if node_obj is None:
            return False

        # Drain the queue
        while not self._reg_queue.empty():
            try:
                self._reg_queue.get(block=False)
            except gevent.queue.Queue.Empty:
                break

        try:
            # Try register the Node 3 times with aggregator before failing back to next aggregator
            for i in range(0, 3):
                R = self._send("POST", self.aggregator,
                               self.aggregator_apiversion, "resource",
                               node_obj)

                if R.status_code == 201:
                    # Continue to registered operation
                    self.logger.writeInfo(
                        "Node Registered with {} at version {}".format(
                            self.aggregator, self.aggregator_apiversion))
                    self._registered()

                    # Trigger registration of Nodes resources
                    self._register_node_resources()

                    return True

                elif R.status_code in [200, 409]:
                    # Delete node from aggregator & re-register
                    if self._unregister_node(R.headers.get('Location')):
                        continue
                    else:
                        # Try next aggregator
                        return False
        except Exception as e:
            self.logger.writeError("Failed to register node {}".format(e))
        return False

    def _register_node_resources(self):
        # Re-register items that must be ordered
        # Re-register things we have in the local cache.
        # "namespace" is e.g. "resource"
        # "entities" are the things associated under that namespace.
        for res_type in self.registration_order:
            for namespace, entities in self._node_data["entities"].items():
                if res_type in entities:
                    self.logger.writeInfo(
                        "Ordered re-registration for type: '{}' in namespace '{}'"
                        .format(res_type, namespace))
                    for key in entities[res_type]:
                        self._queue_request("POST", namespace, res_type, key)

        # Re-register everything else
        # Re-register things we have in the local cache.
        # "namespace" is e.g. "resource"
        # "entities" are the things associated under that namespace.
        for namespace, entities in self._node_data["entities"].items():
            for res_type in entities:
                if res_type not in self.registration_order:
                    self.logger.writeInfo(
                        "Unordered re-registration for type: '{}' in namespace '{}'"
                        .format(res_type, namespace))
                    for key in entities[res_type]:
                        self._queue_request("POST", namespace, res_type, key)

    def _registered(self):
        """Mark Node as registered and reset counters"""
        if (self._mdns_updater is not None):
            self._mdns_updater.P2P_disable()

        self._node_data['registered'] = True
        self._aggregator_list_stale = True

        self._reset_backoff_period()

    def _reset_backoff_period(self):
        self.logger.writeDebug("Resetting backoff period")
        self._backoff_period = 0

    def _increase_backoff_period(self):
        """Exponentially increase the backoff period, until set maximum reached"""
        self.logger.writeDebug("Increasing backoff period")
        self._aggregator_list_stale = True

        if self._backoff_period == 0:
            self._backoff_period = BACKOFF_INITIAL_TIMOUT_SECONDS
            return

        self._backoff_period *= 2
        if self._backoff_period > BACKOFF_MAX_TIMEOUT_SECONDS:
            self._backoff_period = BACKOFF_MAX_TIMEOUT_SECONDS

    def _back_off_timer(self):
        """Sleep for defined backoff period"""
        self.logger.writeDebug("Backoff timer enabled for {} seconds".format(
            self._backoff_period))
        self._backoff_active = True
        gevent.sleep(self._backoff_period)
        self._backoff_active = False

    def _flush_cached_aggregators(self):
        """Flush the list of cached aggregators in the mdns bridge client,
        preventing the use of out of date aggregators"""
        self.logger.writeDebug("Flushing cached list of aggregators")
        self._aggregator_list_stale = False
        self.mdnsbridge.updateServices(self.service_type)

    def _get_aggregator(self):
        """Get the most appropriate aggregator from the mdns bridge client.
        If no aggregator found increment P2P counter, update cache and increase backoff
        If reached the end of available aggregators update cache and increase backoff"""

        try:
            return self.mdnsbridge.getHrefWithException(
                self.service_type, None, self.aggregator_apiversion, PROTOCOL,
                OAUTH_MODE)
        except NoService:
            self.logger.writeDebug(
                "No Registration services found: {} {} {}".format(
                    self.service_type, self.aggregator_apiversion, PROTOCOL))
            if self._mdns_updater is not None:
                self._mdns_updater.inc_P2P_enable_count()
            self._increase_backoff_period()
            return None
        except EndOfServiceList:
            self.logger.writeDebug(
                "End of Registration services list: {} {} {}".format(
                    self.service_type, self.aggregator_apiversion, PROTOCOL))
            self._increase_backoff_period()
            return None

    def _unregister_node(self, url_path=None):
        """Delete node from registry, using url_path if specified"""
        if self.aggregator is None:
            self.logger.writeWarning(
                'Could not un-register as no aggregator set')
            return False
        try:
            self._node_data['registered'] = False
            if url_path is None:
                R = self._send(
                    'DELETE', self.aggregator, self.aggregator_apiversion,
                    'resource/nodes/{}'.format(
                        self._node_data['node']["data"]["id"]))
            else:
                parsed_url = urlparse(url_path)
                R = self._send_request('DELETE', self.aggregator,
                                       parsed_url.path)

            if R.status_code == 204:
                # Successfully deleted node from Registry
                self.logger.writeInfo(
                    "Node unregistered from {} at version {}".format(
                        self.aggregator, self.aggregator_apiversion))
                return True
            else:
                return False
        except Exception as e:
            self.logger.writeDebug(
                'Exception raised while un-registering {}'.format(e))
            return False

    def _process_queue(self):
        """Provided the Node is believed to be correctly registered, hand off a single request to the SEND method
           On client error, clear the resource from the local mirror
           On other error, mark Node as unregistered and trigger re-registration"""
        self.logger.writeDebug("Starting HTTP queue processing thread")
        # Checks queue not empty before quitting to make sure unregister node gets done
        while self._running:
            if (not self._node_data["registered"] or self._reg_queue.empty()
                    or self._backoff_active or not self.aggregator):
                gevent.sleep(1)
            else:
                try:
                    queue_item = self._reg_queue.get()
                    namespace = queue_item["namespace"]
                    res_type = queue_item["res_type"]
                    res_key = queue_item["key"]
                    if queue_item["method"] == "POST":
                        if res_type == "node":
                            send_obj = self._node_data.get("node")
                        else:
                            send_obj = self._node_data["entities"][namespace][
                                res_type].get(res_key)

                        if send_obj is None:
                            self.logger.writeError(
                                "No data to send for resource {}".format(
                                    res_type))
                            continue
                        try:
                            self._send("POST", self.aggregator,
                                       self.aggregator_apiversion,
                                       "{}".format(namespace), send_obj)
                            self.logger.writeInfo("Registered {} {} {}".format(
                                namespace, res_type, res_key))
                        except InvalidRequest as e:
                            self.logger.writeWarning(
                                "Error registering {} {}: {}".format(
                                    res_type, res_key, e))
                            self.logger.writeWarning(
                                "Request data: {}".format(send_obj))
                            del self._node_data["entities"][namespace][
                                res_type][res_key]

                    elif queue_item["method"] == "DELETE":
                        translated_type = res_type + 's'
                        if namespace == "resource" and res_type == "node":
                            # Handle special Node type
                            self._node_data["node"] = None
                            self._node_data["registered"] = False
                        try:
                            self._send(
                                "DELETE", self.aggregator,
                                self.aggregator_apiversion,
                                "{}/{}/{}".format(namespace, translated_type,
                                                  res_key))
                            self.logger.writeInfo(
                                "Un-registered {} {} {}".format(
                                    namespace, translated_type, res_key))
                        except InvalidRequest as e:
                            self.logger.writeWarning(
                                "Error deleting resource {} {}: {}".format(
                                    translated_type, res_key, e))
                    else:
                        self.logger.writeWarning(
                            "Method {} not supported for Registration API interactions"
                            .format(queue_item["method"]))
                except ServerSideError:
                    self.aggregator = None
                    self._aggregator_failure = True
                    self._add_request_to_front_of_queue(queue_item)
                except Exception as e:
                    self.logger.writeError(
                        "Unexpected Error while processing queue, marking Node for re-registration\n"
                        "{}".format(e))
                    self._node_data["registered"] = False
                    self.aggregator = None
                    if (self._mdns_updater is not None):
                        self._mdns_updater.P2P_disable()
        self.logger.writeDebug("Stopping HTTP queue processing thread")

    def _queue_request(self, method, namespace, res_type, key):
        """Queue a request to be processed.
           Handles all requests except initial Node POST which is done in _process_reregister"""
        self._reg_queue.put({
            "method": method,
            "namespace": namespace,
            "res_type": res_type,
            "key": key
        })

    def _add_request_to_front_of_queue(self, request):
        """Adds item to the front of the queue"""

        new_queue = deque()
        new_queue.append(request)

        # Drain the queue
        while not self._reg_queue.empty():
            try:
                new_queue.append(self._reg_queue.get())
            except gevent.queue.Queue.Empty:
                break

        # Add items back to the queue
        while True:
            try:
                self._reg_queue.put(new_queue.popleft())
            except IndexError:
                break

    def register_auth_client(self, client_name, client_uri):
        """Function for Registering OAuth client with Auth Server and instantiating OAuth Client class"""

        if OAUTH_MODE is True:
            if self.auth_registrar is None:
                self.auth_registrar = self._register_auth(
                    client_name=client_name, client_uri=client_uri)
            if self.auth_registrar and self.auth_client is None:
                try:
                    # Register Node Client
                    self.auth_registry.register_client(
                        client_name=client_name,
                        client_uri=client_uri,
                        **self.auth_registrar.server_metadata)
                    self.logger.writeInfo(
                        "Successfully registered Auth Client")
                except (OSError, IOError):
                    self.logger.writeError(
                        "Exception accessing OAuth credentials. This may be a file permissions issue."
                    )
                    return
                # Extract the 'RemoteApp' class created when registering
                self.auth_client = getattr(self.auth_registry, client_name)
                # Fetch Token
                self.get_auth_token()

    def get_auth_token(self):
        """Fetch Access Token either using redirection grant flow or using auth_client"""
        if self.auth_client is not None and self.auth_registrar is not None:
            try:
                if "authorization_code" in self.auth_registrar.client_metadata.get(
                        "grant_types", {}):
                    self.logger.writeInfo(
                        "Endpoint '/oauth' on Node API will provide redirect to authorization endpoint on Auth Server."
                    )
                    return
                elif "client_credentials" in self.auth_registrar.client_metadata.get(
                        "grant_types", {}):
                    # Fetch Token
                    token = self.auth_client.fetch_access_token()
                    # Store token in member variable to be extracted using `fetch_local_token` function
                    self.auth_registry.bearer_token = token
                else:
                    raise OAuth2Error(
                        "Client not registered with supported Grant Type")
            except OAuth2Error as e:
                self.logger.writeError(
                    "Failure fetching access token. {}".format(e))

    def register(self, res_type, key, **kwargs):
        """Register 'resource' type data including the Node
           NB: Node registration is managed by heartbeat thread so may take up to 5 seconds! """
        self.register_into("resource", res_type, key, **kwargs)

    def unregister(self, res_type, key):
        """Unregister 'resource' type data including the Node"""
        self.unregister_from("resource", res_type, key)

    def register_into(self, namespace, res_type, key, **kwargs):
        """General register method for 'resource' types"""
        data = kwargs
        send_obj = {"type": res_type, "data": data}
        if 'id' not in send_obj["data"]:
            self.logger.writeWarning(
                "No 'id' present in data, using key='{}': {}".format(
                    key, data))
            send_obj["data"]["id"] = key

        if namespace == "resource" and res_type == "node":
            # Ensure Registered with Auth Server (is there a better place for this)
            if OAUTH_MODE is True:
                self.register_auth_client("nmos-node-{}".format(data["id"]),
                                          FQDN)
            # Handle special Node type when Node is not registered, by immediately registering
            if self._node_data["node"] is None:
                # Will trigger registration in main thread
                self._node_data["node"] = send_obj
                return
            # Update Node Data
            self._node_data["node"] = send_obj
        else:
            self._add_mirror_keys(namespace, res_type)
            self._node_data["entities"][namespace][res_type][key] = send_obj
        self._queue_request("POST", namespace, res_type, key)

    def unregister_from(self, namespace, res_type, key):
        """General unregister method for 'resource' types"""
        if namespace == "resource" and res_type == "node":
            # Handle special Node type
            self._unregister_node()
            self._node_data["node"] = None
            return
        elif res_type in self._node_data["entities"][namespace]:
            self._add_mirror_keys(namespace, res_type)
            if key in self._node_data["entities"][namespace][res_type]:
                del self._node_data["entities"][namespace][res_type][key]
        self._queue_request("DELETE", namespace, res_type, key)

    def _add_mirror_keys(self, namespace, res_type):
        """Deal with missing keys in local mirror"""
        if namespace not in self._node_data["entities"]:
            self._node_data["entities"][namespace] = {}
        if res_type not in self._node_data["entities"][namespace]:
            self._node_data["entities"][namespace][res_type] = {}

    def stop(self):
        """Stop the Aggregator object running"""
        self.logger.writeDebug("Stopping aggregator proxy")
        self._running = False
        self.main_thread.join()
        self.queue_thread.join()

    def status(self):
        """Return the current status of node in the aggregator"""
        return {
            "api_href": self.aggregator,
            "api_version": self.aggregator_apiversion,
            "registered": self._node_data["registered"]
        }

    def _send(self, method, aggregator, api_ver, url, data=None):
        """Handle sending request to the registration API, with error handling
        HTTP 200, 201, 204, 409 - Success, return response
        Timeout, HTTP 5xx, Connection Error - Raise ServerSideError Exception
        HTTP 4xx - Raise InvalidRequest Exception"""

        url = "{}/{}/{}".format(AGGREGATOR_APIROOT, api_ver, url)

        try:
            resp = self._send_request(method, aggregator, url, data)
            if resp is None:
                self.logger.writeWarning(
                    "No response from aggregator {}".format(aggregator))
                raise ServerSideError

            elif resp.status_code in [200, 201, 204, 409]:
                return resp

            elif (resp.status_code // 100) == 4:
                self.logger.writeWarning(
                    "{} response from aggregator: {} {}".format(
                        resp.status_code, method, urljoin(aggregator, url)))
                self.logger.writeDebug("\nResponse: {}".format(resp.content))
                raise InvalidRequest(resp.status_code)

            else:
                self.logger.writeWarning(
                    "Unexpected status from aggregator {}: {}, {}".format(
                        aggregator, resp.status_code, resp.content))
                raise ServerSideError

        except requests.exceptions.RequestException as e:
            # Log a warning, then let another aggregator be chosen
            self.logger.writeWarning("{} from aggregator {}".format(
                e, aggregator))
            raise ServerSideError

    def _send_request(self, method, aggregator, url_path, data=None):
        """Low level method to send a HTTP request"""

        url = urljoin(aggregator, url_path)
        self.logger.writeDebug("{} {}".format(method, url))

        # We give a long(ish) timeout below, as the async request may succeed after the timeout period
        # has expired, causing the node to be registered twice (potentially at different aggregators).
        # Whilst this isn't a problem in practice, it may cause excessive churn in websocket traffic
        # to web clients - so, sacrifice a little timeliness for things working as designed the
        # majority of the time...
        kwargs = {"method": method, "url": url, "json": data, "timeout": 1.0}
        if _config.get('prefer_ipv6') is True:
            kwargs["proxies"] = {'http': ''}

        # If not in OAuth mode, perform standard request
        if OAUTH_MODE is False or self.auth_client is None:
            return requests.request(**kwargs)
        else:
            # If in OAuth Mode, use OAuth client to automatically fetch token / refresh token if expired
            with self.auth_registry.app.app_context():
                try:
                    return self.auth_client.request(**kwargs)
                # General OAuth Error (e.g. incorrect request details, invalid client, etc.)
                except OAuth2Error as e:
                    self.logger.writeError(
                        "Failed to fetch token before making API call to {}. {}"
                        .format(url, e))
                    self.auth_registrar = self.auth_client = None
class FacadeInterface(object):
    def __init__(self, registry, logger):
        self.host = Host(ADDRESS)
        self.registry = registry
        self.logger = Logger("facade_interface", logger)

        def getbases(cl):
            bases = list(cl.__bases__)
            for x in cl.__bases__:
                bases += getbases(x)
            return bases

        for cl in [
                self.__class__,
        ] + getbases(self.__class__):
            for name in cl.__dict__.keys():
                value = getattr(self, name)
                if callable(value):
                    if hasattr(value, "ipc_method"):
                        self.host.ipcmethod(name)(value)

    def start(self):
        self.host.start()

    def stop(self):
        self.host.stop()

    @ipcmethod
    def srv_register(self, name, srv_type, pid, href, proxy_path):
        self.logger.writeInfo("Service Register {}, {}, {}, {}, {}".format(
            name, srv_type, pid, href, proxy_path))
        return self.registry.register_service(name, srv_type, pid, href,
                                              proxy_path)

    #TODO: =None should be removed once proxying removed from node facade
    @ipcmethod
    def srv_update(self, name, pid, href, proxy_path):
        self.logger.writeInfo("Service Update {}, {}, {}, {}".format(
            name, pid, href, proxy_path))
        return self.registry.update_service(name, pid, href, proxy_path)

    @ipcmethod
    def srv_unregister(self, name, pid):
        self.logger.writeInfo("Service Unregister {}, {}".format(name, pid))
        return self.registry.unregister_service(name, pid)

    @ipcmethod
    def srv_heartbeat(self, name, pid):
        self.logger.writeDebug("Service Heartbeat {}, {}".format(name, pid))
        return self.registry.heartbeat_service(name, pid)

    @ipcmethod
    def res_register(self, name, pid, type, key, value):
        self.logger.writeInfo("Resource Register {} {} {} {} {}".format(
            name, pid, type, key, value))
        return self.registry.register_resource(name, pid, type, key, value)

    @ipcmethod
    def res_update(self, name, pid, type, key, value):
        self.logger.writeInfo("Resource Update {} {} {} {} {}".format(
            name, pid, type, key, value))
        return self.registry.update_resource(name, pid, type, key, value)

    @ipcmethod
    def res_unregister(self, name, pid, type, key):
        self.logger.writeInfo("Resource Unregister {} {} {} {}".format(
            name, pid, type, key))
        return self.registry.unregister_resource(name, pid, type, key)

    @ipcmethod
    def control_register(self, name, pid, device_id, control_data):
        self.logger.writeInfo("Control Register {} {} {} {}".format(
            name, pid, device_id, control_data))
        return self.registry.register_control(name, pid, device_id,
                                              control_data)

    @ipcmethod
    def control_unregister(self, name, pid, device_id, control_data):
        self.logger.writeInfo("Control Unregister {} {} {} {}".format(
            name, pid, device_id, control_data))
        return self.registry.unregister_control(name, pid, device_id,
                                                control_data)

    @ipcmethod
    def self_get(self, name, pid, api_version):
        return self.registry.list_self(api_version)

    @ipcmethod
    def status_get(self, name, pid):
        return self.registry.aggregator.status()

    @ipcmethod
    def clock_register(self, name, pid, clk_data):
        self.logger.writeInfo("Clock Register {} {}".format(name, pid))
        return self.registry.register_clock(clk_data)

    @ipcmethod
    def clock_update(self, name, pid, clk_data):
        self.logger.writeInfo("Clock Update {} {}".format(name, pid))
        return self.registry.update_clock(clk_data)

    @ipcmethod
    def clock_unregister(self, name, pid, clk_name):
        self.logger.writeInfo("Clock Unregister {} {}".format(name, pid))
        return self.registry.unregister_clock(clk_name)
class Facade(object):
    """This class serves as a proxy for the Facade running on the same machine if it exists. If no facade exists
    on this machine then it will do nothing, but calls will still function without throwing any exceptions."""
    def __init__(self, srv_type, address="ipc:///tmp/ips-nodefacade",logger=None):
        self.logger          = Logger("facade_proxy", logger)
        self.ipc             = None
        self.srv_registered  = False # Flag whether service is registered
        self.reregister      = False # Flag whether resources are correctly registered
        self.address         = address
        self.srv_type        = srv_type.lower()
        self.srv_type_urn    = "urn:x-nmos-opensourceprivatenamespace:service:" + self.srv_type
        self.pid             = os.getpid()
        self.resources       = {}
        self.timeline        = {}
        self.href            = None
        self.proxy_path      = None
        self.lock            = Lock() # Protect access to IPC socket

    def setup_ipc(self):
        with self.lock:
            try:
                self.ipc = Proxy(self.address)
            except:
                self.ipc = None

    def register_service(self, href, proxy_path):
        self.logger.writeInfo("Register service")
        self.href = href
        self.proxy_path = proxy_path
        if not self.ipc:
            self.setup_ipc()
        if not self.ipc:
            return
        try:
            with self.lock:
                s = self.ipc.srv_register(self.srv_type, self.srv_type_urn, self.pid, href, proxy_path)
                if s == FAC_SUCCESS:
                    self.srv_registered = True
                else:
                    self.logger.writeInfo("Service registration failed: {}".format(self.debug_message(s)))
        except Exception as e:
            self.ipc = None

    def unregister_service(self):
        if not self.ipc:
            self.setup_ipc()
        if not self.ipc:
            return
        try:
            with self.lock:
                self.ipc.srv_unregister(self.srv_type, self.pid)
                self.srv_registered = False
        except Exception as e:
            self.ipc = None

    def heartbeat_service(self):
        if not self.ipc:
            self.setup_ipc()
        if not self.ipc:
            return
        try:
            with self.lock:
                s = self.ipc.srv_heartbeat(self.srv_type, self.pid)
                if s != FAC_SUCCESS:
                    self.srv_registered = False
                    self.logger.writeInfo("Heartbeat failed: {}".format(self.debug_message(s)))
                else:
                    self.srv_registered = True
            if not self.srv_registered or self.reregister:
                # Handle reconnection if facade disappears
                self.logger.writeInfo("Reregistering all services")
                self.reregister_all()
        except Exception as e:
            self.ipc = None

    # ONLY call this directly from within heartbeat_service!
    # To cause a re-registration on failure, set self.reregister!
    def reregister_all(self):
        self.unregister_service()
        if self.srv_registered:
            return
        self.register_service(self.href, self.proxy_path)
        if not self.srv_registered:
            return

        # TODO: the following blocks are so similar...

        # re-register resources
        for type in self.resources:
            for key in self.resources[type]:
                try:
                    with self.lock:
                        resource = self.resources[type][key]
                        # Hide some implementation details for receivers
                        if type == "receiver":
                            resource = deepcopy(self.resources[type][key])
                            if "pipel_id" in self.resources[type][key]:
                                resource.pop('pipel_id')
                            if "pipeline_id" in self.resources[type][key]:
                                resource.pop('pipeline_id')
                        self.ipc.res_register(self.srv_type, self.pid, type, key, resource)
                except Exception as e:
                    self.ipc = None
                    gevent.sleep(0)
                    return

        # re-register timeline items
        for type in self.timeline:
            for key in self.timeline[type]:
                try:
                    with self.lock:
                        self.ipc.timeline_register(self.srv_type, self.pid, type, key, self.timeline[type][key])
                except Exception as e:
                    self.ipc = None
                    gevent.sleep(0)
                    return

        self.reregister = False

    def _call_ipc_method(self, method, *args, **kwargs):
        if not self.srv_registered:
            # Don't attempt if not registered - will just hit many timeouts
            self.reregister = True
            return
        if not self.ipc:
            self.setup_ipc()
        if not self.ipc:
            self.reregister = True
            return
        try:
            with self.lock:
                return self.ipc.invoke_named(method, self.srv_type, self.pid, *args, **kwargs)
        except Exception as e:
            self.ipc = None
            self.reregister = True

    def addResource(self, type, key, value):
        if type not in self.resources:
            self.resources[type] = {}
        self.resources[type][key] = value
        self._call_ipc_method("res_register", type, key, value)

    def updateResource(self, type, key, value):
        if type not in self.resources:
            self.resources[type] = {}
        self.resources[type][key] = value
        self._call_ipc_method("res_update", type, key, value)

    def delResource(self, type, key):
        if type in self.resources:
            # Hack until adoption of flow instances (Ensure transports for flow are deleted)
            if type == "flow" and "transport" in self.resources:
                for transport in self.resources["transport"].keys():
                    if key == self.resources["transport"][transport]["flow-id"]:
                        del self.resources["transport"][transport]
            if key in self.resources[type]:
                del self.resources[type][key]
        self._call_ipc_method("res_unregister", type, key)

    def addTimeline(self, type, key, value):
        if type not in self.timeline:
            self.timeline[type] = {}
        self.timeline[type][key] = value
        self._call_ipc_method("timeline_register", type, key, value)

    def updateTimeline(self, type, key, value):
        if type not in self.timeline:
            self.timeline[type] = {}
        self.timeline[type][key] = value
        self._call_ipc_method("timeline_update", type, key, value)

    def delTimeline(self, type, key):
        if type in self.timeline:
            if key in self.timeline[type]:
                del self.timeline[type][key]
        self._call_ipc_method("timeline_unregister", type, key)

    def get_node_self(self, api_version="v1.1"):
        return self._call_ipc_method("self_get", api_version)

    def debug_message(self, code):
        msg =  {FAC_SUCCESS:"Success!",
                FAC_EXISTS:"Service already exists",
                FAC_UNREGISTERED:"Service isn't yet registered",
                FAC_UNAUTHORISED:"Unauthorised",
                FAC_UNSUPPORTED:"Unsupported",
                FAC_OTHERERROR:"Other error"} [code]
        return msg
Beispiel #9
0
class ConnectionManagementService:

    def __init__(self, logger=None):
        self.running = False
        from nmoscommon.logger import Logger
        from nmosnode.facade import Facade
        self.logger = Logger("conmanage")
        self.logger.writeWarning("Could not find ipppython facade")
        self.facade = Facade("{}/{}".format(CONN_APINAME, CONN_APIVERSIONS[-1]),
                             address="ipc:///tmp/ips-nodefacade", logger=self.logger)
        self.logger.writeDebug("Running Connection Management Service")
        self.httpServer = HttpServer(ConnectionManagementAPI, WS_PORT,
                                     '0.0.0.0', api_args=[self.logger])

    def start(self):
        '''Call this to run the API without blocking'''
        if self.running:
            gevent.signal_handler(signal.SIGINT, self.sig_handler)
            gevent.signal_handler(signal.SIGTERM, self.sig_handler)

        self.running = True

        self.httpServer.start()

        while not self.httpServer.started.is_set():
            self.logger.writeDebug('Waiting for httpserver to start...')
            self.httpServer.started.wait()

        if self.httpServer.failed is not None:
            raise self.httpServer.failed

        self.logger.writeDebug("Running on port: {}"
                               .format(self.httpServer.port))

        self.facade.register_service("http://127.0.0.1:{}".format(self.httpServer.port),
                                     "{}{}/".format(CONN_ROOT[1:], CONN_APIVERSIONS[-1]))
        try:
            from nmosconnectiondriver.httpIpstudioDriver import httpIpstudioDriver
            self.logger.writeInfo("Using ipstudio driver")
            # Start the IPStudio driver
            self.driver = httpIpstudioDriver(
                self.httpServer.api,
                self.logger,
                self.facade
            )
        except ImportError:
            # Start the mock driver
            self.driver = NmosDriver(
                self.httpServer.api,
                self.logger,
                self.facade
            )

    def run(self):
        '''Call this to run the API in keep-alive (blocking) mode'''
        self.running = True
        self.start()
        itercount = 0
        while self.running:
            gevent.sleep(1)
            itercount += 1
            if itercount == 5:
                self.facade.heartbeat_service()
                itercount = 0
        self._cleanup()

    def _cleanup(self):
        self.httpServer.stop()
        self.facade.unregister_service()

    def sig_handler(self):
        self.stop()

    def stop(self):
        '''Gracefully shut down the API'''
        self._cleanup()
        self.running = False
Beispiel #10
0
class NodeFacadeService:
    def __init__(self, interactive=False):
        self.logger = Logger("facade", None)
        if HOST == "":
            self.logger.writeFatal(
                "Unable to start facade due to lack of connectivity")
            sys.exit(1)
        self.running = False
        self.httpServer = None
        self.interface = None
        self.interactive = interactive
        self.registry = None
        self.registry_cleaner = None
        self.node_id = None
        self.mdns = MDNSEngine()
        self.mappings = {
            "device": "ver_dvc",
            "flow": "ver_flw",
            "source": "ver_src",
            "sender": "ver_snd",
            "receiver": "ver_rcv",
            "self": "ver_slf"
        }
        self.mdns_updater = None
        self.auth_registry = AuthRegistry(app=None, scope=ALLOWED_SCOPE)

        self.protocol = PROTOCOL
        if PROTOCOL == "https":
            self.dns_sd_port = DNS_SD_HTTPS_PORT
        else:
            self.dns_sd_port = DNS_SD_HTTP_PORT
        if ENABLE_P2P:
            self.mdns_updater = MDNSUpdater(self.mdns,
                                            DNS_SD_TYPE,
                                            DNS_SD_NAME,
                                            self.mappings,
                                            self.dns_sd_port,
                                            self.logger,
                                            txt_recs=self._mdns_txt(
                                                NODE_APIVERSIONS,
                                                self.protocol, OAUTH_MODE))

        self.aggregator = Aggregator(self.logger, self.mdns_updater,
                                     self.auth_registry)

    def _mdns_txt(self, versions, protocol, oauth_mode):
        return {
            "api_ver": ",".join(versions),
            "api_proto": protocol,
            "api_auth": str(oauth_mode).lower()
        }

    def sig_handler(self):
        print('Pressed ctrl+c')
        self.stop()

    def sig_hup_handler(self):
        if getLocalIP() != "":
            global HOST
            HOST = updateHost()
            self.registry.modify_node(href=self.generate_href(),
                                      host=HOST,
                                      api={
                                          "versions": NODE_APIVERSIONS,
                                          "endpoints":
                                          self.generate_endpoints()
                                      },
                                      interfaces=self.list_interfaces())

    def generate_endpoints(self):
        endpoints = []
        endpoints.append({
            "host": HOST,
            "port": self.dns_sd_port,  # Everything should go via apache proxy
            "protocol": self.protocol,
            "authorization": OAUTH_MODE
        })
        return endpoints

    def generate_href(self):
        return "{}://{}/".format(PROTOCOL, HOST)

    def list_interfaces(self):
        interfaces = {}
        # Initially populate interfaces from known-good location
        net_path = "/sys/class/net/"
        if os.path.exists(net_path):
            for interface_name in os.listdir(net_path):
                address_path = net_path + interface_name + "/address"
                if os.path.exists(address_path) and interface_name != 'lo':
                    with open(address_path, 'r') as address_file:
                        address = address_file.readline().strip('\n')
                        if address:
                            interfaces[interface_name] = {
                                "name": interface_name,
                                "chassis_id": None,
                                "port_id": address.lower().replace(":", "-")
                            }

        # Attempt to source proper LLDP data for interfaces
        if os.path.exists("/usr/sbin/lldpcli"):
            try:
                chassis_data = json.loads(
                    check_output(
                        ["/usr/sbin/lldpcli", "show", "chassis", "-f",
                         "json"]))
                chassis_id = chassis_data["local-chassis"]['chassis'].values(
                )[0]["id"]["value"]
                if chassis_data["local-chassis"]['chassis'].values(
                )[0]["id"]["type"] == "mac":
                    chassis_id = chassis_id.lower().replace(":", "-")
                interface_data = json.loads(
                    check_output([
                        "/usr/sbin/lldpcli", "show", "statistics", "-f", "json"
                    ]))
                if isinstance(interface_data["lldp"]["interface"], dict):
                    for interface_name in interface_data["lldp"][
                            "interface"].keys():
                        if interface_name in interfaces:
                            # Only correct the Chassis ID. Port ID MUST be a MAC address
                            interfaces[interface_name][
                                "chassis_id"] = chassis_id
                else:
                    for interface_block in interface_data["lldp"]["interface"]:
                        interface_name = interface_block.keys()[0]
                        if interface_name in interfaces:
                            # Only correct the Chassis ID. Port ID MUST be a MAC address
                            interfaces[interface_name][
                                "chassis_id"] = chassis_id
            except Exception:
                pass

        return list(itervalues(interfaces))

    def start(self):
        if self.running:
            gevent.signal_handler(signal.SIGINT, self.sig_handler)
            gevent.signal_handler(signal.SIGTERM, self.sig_handler)
            gevent.signal_handler(signal.SIGHUP, self.sig_hup_handler)

        self.mdns.start()
        self.node_id = get_node_id()
        node_version = str(ptptime.ptp_detail()[0]) + ":" + str(
            ptptime.ptp_detail()[1])
        node_data = {
            "id":
            self.node_id,
            "label":
            _config.get('node_label', FQDN),
            "description":
            _config.get('node_description', "Node on {}".format(FQDN)),
            "tags":
            _config.get('node_tags', {}),
            "href":
            self.generate_href(),
            "host":
            HOST,
            "services": [],
            "hostname":
            HOSTNAME,
            "caps": {},
            "version":
            node_version,
            "api": {
                "versions": NODE_APIVERSIONS,
                "endpoints": self.generate_endpoints(),
            },
            "clocks": [],
            "interfaces":
            self.list_interfaces()
        }
        self.registry = FacadeRegistry(self.mappings.keys(), self.aggregator,
                                       self.mdns_updater, self.node_id,
                                       node_data, self.logger)
        self.registry_cleaner = FacadeRegistryCleaner(self.registry)
        self.registry_cleaner.start()
        self.httpServer = HttpServer(
            FacadeAPI,
            PORT,
            '0.0.0.0',
            api_args=[self.registry, self.auth_registry])
        self.httpServer.start()
        while not self.httpServer.started.is_set():
            self.logger.writeInfo('Waiting for httpserver to start...')
            self.httpServer.started.wait()

        if self.httpServer.failed is not None:
            raise self.httpServer.failed

        self.logger.writeInfo("Running on port: {}".format(
            self.httpServer.port))

        try:
            self.logger.writeInfo("Registering as {}...".format(self.node_id))
            self.aggregator.register(
                'node', self.node_id,
                **translate_api_version(node_data, "node", NODE_REGVERSION))
        except Exception as e:
            self.logger.writeWarning("Could not register: {}".format(
                e.__repr__()))

        self.interface = FacadeInterface(self.registry, self.logger)
        self.interface.start()

    def run(self):
        self.running = True
        pidfile = "/tmp/ips-nodefacade.pid"
        with open(pidfile, 'w') as f:
            f.write(str(getpid()))
        self.start()
        daemon.notify(SYSTEMD_READY)
        while self.running:
            self.registry.update_ptp()
            time.sleep(1)
        os.unlink(pidfile)

    def _cleanup(self):
        try:
            self.logger.writeDebug("cleanup: unregister facade " +
                                   self.node_id)
            self.aggregator.unregister('node', self.node_id)
        except Exception as e:
            self.logger.writeWarning("Could not unregister: {}".format(e))

        if self.mdns:
            try:
                self.mdns.stop()
            except Exception as e:
                self.logger.writeWarning("Could not stop mdns: {}".format(e))

        self.registry_cleaner.stop()
        self.interface.stop()
        self.httpServer.stop()
        self.aggregator.stop()
        self.mdns_updater.stop()
        self.logger.writeInfo("Stopped main()")

    def stop(self):
        self._cleanup()
        self.running = False
Beispiel #11
0
class QueryService(object):
    def __init__(self,
                 mdns_bridge,
                 logger=None,
                 apiversion=QUERY_APIVERSION,
                 priority=None):
        self.mdns_bridge = mdns_bridge
        self._query_url = self.mdns_bridge.getHref(QUERY_MDNSTYPE, priority)
        iter = 0
        #TODO FIXME: Remove once IPv6 work complete and Python can use link local v6 correctly
        while "fe80:" in self._query_url:
            self._query_url = self.mdns_bridge.getHref(QUERY_MDNSTYPE,
                                                       priority)
            iter += 1
            if iter > 20:
                break
        self.logger = Logger("nmoscommon.query", logger)
        self.apiversion = apiversion
        self.priority = priority

    def _get_query(self, url):
        backoff = [0.3, 0.7, 1.0]
        for try_i in xrange(len(backoff)):
            try:
                response = requests.get("{}/{}/{}/{}{}".format(
                    self._query_url, QUERY_APINAMESPACE, QUERY_APINAME,
                    self.apiversion, url))
                response.raise_for_status()
                return response
            except Exception as e:
                self.logger.writeWarning(
                    "Could not GET from query service at {}{}: {}".format(
                        self._query_url, url, e))
                if try_i == len(backoff) - 1:
                    raise QueryNotFoundError(e)

                # TODO: sleep between requests to back off
                self._query_url = self.mdns_bridge.getHref(
                    QUERY_MDNSTYPE, self.priority)
                self.logger.writeInfo("Trying query at: {}".format(
                    self._query_url))

        # Shouldn't get this far, but don't return None
        raise QueryNotFoundError(
            "Could not find a query service (should be unreachable!)"
        )  # pragma: no cover

    def get_services(self, service_urn, node_id=None):
        """
        Look for nodes which contain a particular service type.
        Returns a list of found service objects, or an empty list on not-found.
        May raise a QueryNotFound exception if query service can't be contacted.
        """
        response = self._get_query("/nodes/")
        if response.status_code != 200:
            self.logger.writeError(
                "Could not get /nodes/ from query service at {}".format(
                    self._query_url))
            return []

        nodes = response.json()

        services = []

        if node_id == None:
            services = itertools.chain.from_iterable(
                [n.get('services', []) for n in nodes])
        else:
            services = itertools.chain.from_iterable(
                [n.get('services', []) for n in nodes if n["id"] == node_id])

        return [s for s in services if s.get('type', 'unknown') == service_urn]

    def subscribe_topic(self, topic, on_event, on_open=None):
        """
        Subscribe to a query service topic, calling `on_event` for changes.
        Will block unless wrapped in a gevent greenlet:
            gevent.spawn(qs.subscribe_topic, "flows", on_event)
        If `on_open` is given, it will be called when the websocket is opened.
        """
        query_url = self.mdns_bridge.getHref(QUERY_MDNSTYPE, self.priority)

        if query_url == "":
            raise BadSubscriptionError(
                "Could not get query service from mDNS bridge")

        query_url = query_url + "/" + QUERY_APINAMESPACE + "/" + QUERY_APINAME + "/" + self.apiversion

        resource_path = "/" + topic.strip("/")
        params = {
            "max_update_rate_ms": 100,
            "persist": False,
            "resource_path": resource_path,
            "params": {}
        }
        r = requests.post(query_url + "/subscriptions",
                          data=json.dumps(params),
                          proxies={'http': ''})
        if r.status_code not in [200, 201]:
            raise BadSubscriptionError("{}: {}".format(r.status_code, r.text))

        r_json = r.json()
        if not "ws_href" in r_json:
            raise BadSubscriptionError(
                "Result has no 'ws_href': {}".format(r_json))

        assert (query_url.startswith("http://"))
        ws_href = r_json.get("ws_href")

        # handlers for websocket events
        def _on_open(*args):
            if on_open is not None:
                on_open()

        def _on_close(*args):
            pass

        def _on_message(*args):
            assert (len(args) >= 1)
            data = json.loads(args[1])
            events = data["grain"]["data"]
            if isinstance(events, dict):
                events = [events]
            for event in events:
                on_event(event)

        # Open websocket connection, and poll
        sock = websocket.WebSocketApp(ws_href,
                                      on_open=_on_open,
                                      on_message=_on_message,
                                      on_close=_on_close)
        if sock is None:
            raise BadSubscriptionError(
                "Could not open websocket at {}".format(ws_href))

        sock.run_forever()
Beispiel #12
0
class EtcdEventQueue(object):
    """
    Attempt to overcome the "missed etcd event" issue, which can be caused when
    processing of a return from a http long-poll takes too long, and an event is
    missed in etcd.

    This uses etcd's "waitIndex" functionality, which has the caveat that only
    the last 1000 events are stored. So, whilst this scheme should not miss events
    for fast updates, the case where 1000 updates occur within the space of a
    single event being processed will still be missed. This is unlikely, but still
    possible, so a "sentinel" message with action=index_skip will be sent to
    the output queue when this happens.

    To use this, the `queue' member of EtcdEventQueue is iterable:

    q = EtcdEventQueue()
    for message in q.queue:
        # process

    This uses http://www.gevent.org/gevent.queue.html as an underlying data
    structure, so can be consumed from multiple greenlets if necessary.
    """

    def __init__(self, host, port, logger=None):
        self.queue = gevent.queue.Queue()
        self._base_url = "http://{}:{}/v2/keys/resource/".format(host, port)
        self._long_poll_url = self._base_url + "?recursive=true&wait=true"
        self._greenlet = gevent.spawn(self._wait_event, 0)
        self._alive = True
        self._logger = Logger("etcd_watch", logger)

    def _get_index(self, current_index):
        index = current_index
        try:
            response = requests.get(self._base_url, proxies={'http': ''}, timeout=1)
            if response is not None:
                if response.status_code == 200:
                    index = _get_etcd_index(response, self._logger)
                    self._logger.writeDebug("waitIndex now = {}".format(index))

                    # Always want to know if the index we were waiting on was greater
                    # than current index, as this indicates something that needs further
                    # investigation...
                    if index < current_index:
                        self._logger.writeWarning("Index decreased! {} -> {}".format(current_index, index))

                elif response.status_code in [400, 404]:
                    # '/resource' not found in etcd yet, back off for a second and set waitIndex to value of the x-etcd-index header
                    index = int(response.headers.get('x-etcd-index', 0))
                    self._logger.writeInfo("{} not found, wait... waitIndex={}".format(self._base_url, index))
                    gevent.sleep(1)

            else:
                # response was None...
                self._logger.writeWarning("Could not GET {} after timeout; waitIndex now=0".format(self._base_url))
                index = 0

        except Exception as ex:
            # Getting the new index failed, so reset to 0.
            self._logger.writeWarning("Reset waitIndex to 0, error: {}".format(ex))
            index = 0

        return index

    def _wait_event(self, since):
        current_index = since

        while self._alive:
            req = None
            try:
                # Make the long-poll request to etcd using the current
                # "waitIndex".  A timeout is used as situations have been
                # observed where the etcd modification index decreases (caused
                # by network partition or by a node having it's data reset?),
                # and the query service is not restarted, hence the code below
                # is left waiting for a much higher modification index than it
                # should.  To mitigate this simply, when a timeout occurs,
                # assume that the modified index is "wrong", and forcibly try
                # to fetch the next index. This may "miss" updates, which is of
                # limited consequence. An enhancement (and therefore
                # complication...) could use the fact that the timeout is
                # small, and set waitIndex to the x-etcd-index result minus
                # some heuristically determined number of updates, to try and
                # catch the "back-in-time" updates stored in etcd's log, but
                # this feels brittle and overcomplicated for something that
                # could be solved by a browser refresh/handling of the "skip"
                # event to request a full set of resources.

                # https://github.com/coreos/etcd/blob/master/Documentation/api.md#waiting-for-a-change
                next_index_param = "&waitIndex={}".format(current_index + 1)
                req = requests.get(self._long_poll_url + next_index_param, proxies={'http': ''}, timeout=20)

            except socket.timeout:
                # Get a new wait index to watch from by querying /resource
                self._logger.writeDebug("Timeout waiting on long-poll. Refreshing waitIndex...")
                current_index = self._get_index(current_index)
                continue

            except Exception as ex:
                self._logger.writeWarning("Could not contact etcd: {}".format(ex))
                gevent.sleep(5)
                continue

            if req is not None:
                # Decode payload, which should be json...
                try:
                    json = req.json()

                except Exception:
                    self._logger.writeError("Error decoding payload: {}".format(req.text))
                    continue

                if req.status_code == 200:
                    # Return from request was OK, so put the payload on the queue.
                    # NOTE: we use the "modifiedIndex" of the _node_ we receive, NOT the header.
                    # This follows the etcd docs linked above.
                    self.queue.put(json)
                    current_index = json.get('node', {}).get('modifiedIndex', current_index)

                else:
                    # Error codes documented here:
                    #  https://github.com/coreos/etcd/blob/master/Documentation/errorcode.md
                    self._logger.writeInfo("error: http:{}, etcd:{}".format(req.status_code, json.get('errorCode', 0)))
                    if json.get('errorCode', 0) == 401:
                        # Index has been cleared. This may cause missed events, so send an (invented) sentinel message to queue.
                        new_index = self._get_index(current_index)
                        self._logger.writeWarning("etcd history not available; skipping {} -> {}".format(current_index, new_index))
                        self.queue.put({'action': 'index_skip', 'from': current_index, 'to': new_index})
                        current_index = new_index

    def stop(self):
        self._logger.writeInfo("Stopping service")
        print "stopping"
        self._alive = False
        self._greenlet.kill(timeout=5)
        self.queue.put(StopIteration)
class FacadeInterface(object):
    def __init__(self, registry, logger):
        self.host = Host(ADDRESS)
        self.registry = registry
        self.logger = Logger("facade_interface", logger)

        def getbases(cl):
            bases = list(cl.__bases__)
            for x in cl.__bases__:
                bases += getbases(x)
            return bases

        for cl in [
                self.__class__,
        ] + getbases(self.__class__):
            for name in cl.__dict__.keys():
                value = getattr(self, name)
                if callable(value):
                    if hasattr(value, "ipc_method"):
                        self.host.ipcmethod(name)(value)

    def start(self):
        self.host.start()

    def stop(self):
        self.host.stop()

    @ipcmethod
    def srv_register(self, name, srv_type, pid, href, proxy_path):
        self.logger.writeInfo("Service Register {}, {}, {}, {}, {}".format(
            name, srv_type, pid, href, proxy_path))
        return self.registry.register_service(name, srv_type, pid, href,
                                              proxy_path)

    @ipcmethod
    def srv_update(self, name, pid, href, proxy_path):
        self.logger.writeInfo("Service Update {}, {}, {}, {}".format(
            name, pid, href, proxy_path))
        return self.registry.update_service(name, pid, href, proxy_path)

    @ipcmethod
    def srv_unregister(self, name, pid):
        self.logger.writeInfo("Service Unregister {}, {}".format(name, pid))
        return self.registry.unregister_service(name, pid)

    @ipcmethod
    def srv_heartbeat(self, name, pid):
        self.logger.writeDebug("Service Heartbeat {}, {}".format(name, pid))
        return self.registry.heartbeat_service(name, pid)

    @ipcmethod
    def res_register(self, name, pid, type, key, value):
        self.logger.writeInfo("Resource Register {} {} {} {} {}".format(
            name, pid, type, key, value))
        return self.registry.register_resource(name, pid, type, key, value)

    @ipcmethod
    def res_update(self, name, pid, type, key, value):
        self.logger.writeInfo("Resource Update {} {} {} {} {}".format(
            name, pid, type, key, value))
        return self.registry.update_resource(name, pid, type, key, value)

    @ipcmethod
    def res_unregister(self, name, pid, type, key):
        self.logger.writeInfo("Resource Unregister {} {} {} {}".format(
            name, pid, type, key))
        return self.registry.unregister_resource(name, pid, type, key)

    @ipcmethod
    def timeline_register(self, name, pid, type, key, value):
        self.logger.writeInfo("Timeline Register {} {} {} {} {}".format(
            name, pid, type, key, value))
        return self.registry.register_to_timeline(name, pid, type, key, value)

    @ipcmethod
    def timeline_update(self, name, pid, type, key, value):
        self.logger.writeInfo("Timeline Update {} {} {} {} {}".format(
            name, pid, type, key, value))
        return self.registry.update_timeline(name, pid, type, key, value)

    @ipcmethod
    def timeline_unregister(self, name, pid, type, key):
        self.logger.writeInfo("Timeline Unregister {} {} {} {}".format(
            name, pid, type, key))
        return self.registry.unregister_from_timeline(name, pid, type, key)

    @ipcmethod
    def self_get(self, name, pid, api_version):
        return self.registry.list_self(api_version)