Ejemplo n.º 1
0
    def check_preconditions(self):
        if self.preconditions_true.is_set():
            return

        preconds = get_safe(self._pd_core.pd_cfg,
                            "engine.await_preconditions") or {}
        precond_ok = True
        ee_infos = [
            c for c in self._containers.values()
            if c["state"] == EE_STATE_RUNNING
        ]
        min_ees = preconds.get("min_engines", 0)
        if min_ees:
            if len(ee_infos) < min_ees:
                precond_ok = False
        engines_exist = preconds.get("engines_exist", None)
        if engines_exist:
            running_engines = {
                get_safe(e, "ee_info.name", "")
                for e in ee_infos
            }
            precond_ok = precond_ok and running_engines.issuperset(
                set(engines_exist))

        if precond_ok:
            log.info("ProcessDispatcher start preconditions now True")
            self.preconditions_true.set()
Ejemplo n.º 2
0
    def on_init(self):
        self.ingestion_profile = self.CFG.get_safe(CONFIG_KEY + ".ingestion_profile", "default")

        log.info("Ingestion starting using profile '%s'", self.ingestion_profile)
        self.exchange_name = "ingestion_process"

        self.ingestion_config = self.CFG.get_safe(CONFIG_KEY + ".profile_" + self.ingestion_profile) or {}
        if not self.ingestion_config:
            raise BadRequest("No config found for profile '%s'" % self.ingestion_profile)

        plugin_cls = get_safe(self.ingestion_config, "plugin")
        self.plugin = named_any(plugin_cls)(self)
        log.info("Started ingestion plugin '%s'", plugin_cls)

        self.persistence_formats = {}
        self.persistence_objects = {}
        self.default_persistence_format = get_safe(self.ingestion_config, "persist.persistence_format")
        self._require_persistence_layer(self.default_persistence_format)

        self.stream_sub = StreamSubscriber(process=self, exchange_name=self.exchange_name,
                                           callback=self.process_package)
        streams = get_safe(self.ingestion_config, "stream_subscriptions") or []
        for stream in streams:
            if isinstance(stream, list):
                stream = StreamRoute(exchange_point=stream[0], routing_key=stream[1])

            log.info("Ingestion subscribed to stream '%s'", stream)
            self.stream_sub.add_stream_subscription(stream)

        self.plugin.on_init()

        self.stream_sub.start()
Ejemplo n.º 3
0
    def on_init(self):
        self.ingestion_profile = self.CFG.get_safe(CONFIG_KEY + ".ingestion_profile", "default")

        log.info("Ingestion starting using profile '%s'", self.ingestion_profile)
        self.exchange_name = "ingestion_process"

        self.ingestion_config = self.CFG.get_safe(CONFIG_KEY + ".profile_" + self.ingestion_profile) or {}
        if not self.ingestion_config:
            raise BadRequest("No config found for profile '%s'" % self.ingestion_profile)

        plugin_cls = get_safe(self.ingestion_config, "plugin")
        self.plugin = named_any(plugin_cls)(self)
        log.info("Started ingestion plugin '%s'", plugin_cls)

        self.persistence_formats = {}
        self.persistence_objects = {}
        self.default_persistence_format = get_safe(self.ingestion_config, "persist.persistence_format")
        self._require_persistence_layer(self.default_persistence_format)

        self.stream_sub = StreamSubscriber(
            process=self, exchange_name=self.exchange_name, callback=self.process_package
        )
        streams = get_safe(self.ingestion_config, "stream_subscriptions") or []
        for stream in streams:
            if isinstance(stream, list):
                stream = StreamRoute(exchange_point=stream[0], routing_key=stream[1])

            log.info("Ingestion subscribed to stream '%s'", stream)
            self.stream_sub.add_stream_subscription(stream)

        self.plugin.on_init()

        self.stream_sub.start()
Ejemplo n.º 4
0
 def __init__(self, pd_core):
     self._pd_core = pd_core
     self.container = self._pd_core.container
     self.queue = Queue()
     self.quit_event = Event()
     self.exec_pool_size = min(int(get_safe(self._pd_core.pd_cfg, "executor.pool_size") or 1), 10)
     self.exec_pool = Pool(size=self.exec_pool_size)
Ejemplo n.º 5
0
    def start(self):
        queue_name = get_safe(self._pd_core.pd_cfg, "command_queue") or "pd_command"
        self.sub_cont = Subscriber(binding=queue_name, from_name=queue_name, callback=self._receive_command)
        self.sub_cont_gl = spawn(self.sub_cont.listen, activate=False)
        self.sub_cont.get_ready_event().wait()

        self.pub_result = Publisher()
Ejemplo n.º 6
0
 def __init__(self, pd_core):
     self._pd_core = pd_core
     self.container = self._pd_core.container
     self.queue = Queue()
     self.quit_event = Event()
     self.exec_pool_size = min(
         int(get_safe(self._pd_core.pd_cfg, "executor.pool_size") or 1), 10)
     self.exec_pool = Pool(size=self.exec_pool_size)
Ejemplo n.º 7
0
    def check_preconditions(self):
        if self.preconditions_true.is_set():
            return

        preconds = get_safe(self._pd_core.pd_cfg, "engine.await_preconditions") or {}
        precond_ok = True
        ee_infos = [c for c in self._containers.values() if c["state"] == EE_STATE_RUNNING]
        min_ees = preconds.get("min_engines", 0)
        if min_ees:
            if len(ee_infos) < min_ees:
                precond_ok = False
        engines_exist = preconds.get("engines_exist", None)
        if engines_exist:
            running_engines = {get_safe(e, "ee_info.name", "") for e in ee_infos}
            precond_ok = precond_ok and running_engines.issuperset(set(engines_exist))

        if precond_ok:
            log.info("ProcessDispatcher start preconditions now True")
            self.preconditions_true.set()
Ejemplo n.º 8
0
    def _determine_target_container(self, app_cfg, target_engine):
        app_name = app_cfg["name"]
        if target_engine is None:
            target_engine = self._determine_target_engine(app_cfg)

        # Determine available containers
        ee_containers = self.registry.get_engine_containers()
        ee_conts = ee_containers.get(target_engine, None)
        if not ee_conts:
            raise BadRequest("No running containers for app {}".format(app_name))
        ee_conts_sorted = sorted(ee_conts, key=lambda c: c["ts_created"])

        # Load balance across containers
        dispatch_spread = get_safe(self._pd_core.pd_cfg, "engine.dispatch_spread") or "round_robin"
        if dispatch_spread == "round_robin":
            cont_info, current_min_alloc = None, 9999999
            for cont in ee_conts_sorted:
                if len(cont["allocation"]) < current_min_alloc:
                    max_capacity = int(get_safe(cont["ee_info"], "capacity.max", 0))
                    if max_capacity and len(cont["allocation"]) < max_capacity:
                        cont_info = cont
                        current_min_alloc = len(cont["allocation"])
            if not cont_info:
                raise BadRequest("Could not find open slot")
        elif dispatch_spread == "fill_up":
            cont_info, current_min_alloc = None, 9999999
            for cont in ee_conts_sorted:
                if len(cont["allocation"]) < int(get_safe(cont["ee_info"], "capacity.max", 0)):
                    cont_info = cont
                    break
            if not cont_info:
                raise BadRequest("Could not find open slot")
        elif dispatch_spread == "random":
            cont_num = random.randint(0, len(ee_conts_sorted)-1)
            cont_info = ee_conts_sorted[cont_num]
        else:
            raise BadRequest("Unknown dispatch_spread {}".format(dispatch_spread))

        return cont_info
Ejemplo n.º 9
0
            def start_sub():
                if not self.registry.preconditions_true.is_set():
                    log.info("PD is leader - awaiting PD preconditions")
                    # Await preconditions
                    await_timeout = get_safe(self._pd_core.pd_cfg, "engine.await_preconditions.await_timeout")
                    precond_true = self.registry.preconditions_true.wait(timeout=await_timeout)
                    if not precond_true:
                        log.warn("PD preconditions not satisfied after timeout - continuing")

                if self._pd_core.is_leader() and self.sub_cont is not None and not self.sub_active:
                    # Are we still leader? Not activated?
                    num_msg, num_cons = self.sub_cont.get_stats()
                    log.info("PD is leader - starting to consume (%s pending commands, %s consumers)", num_msg, num_cons)
                    self.sub_cont.activate()
                    self.sub_active = True
Ejemplo n.º 10
0
    def start(self):
        # Create our own queue for container heartbeats and broadcasts
        topic = get_safe(self._pd_core.pd_cfg, "aggregator.container_topic") or "bx_containers"
        queue_name = "pd_aggregator_%s_%s" % (topic, create_valid_identifier(self.container.id, dot_sub="_"))
        self.sub_cont = Subscriber(binding=topic, from_name=queue_name, auto_delete=True,
                                   callback=self._receive_container_info)
        self.sub_cont_gl = spawn(self.sub_cont.listen)
        self.sub_cont.get_ready_event().wait()

        self.evt_sub = EventSubscriber(event_type=OT.ContainerLifecycleEvent, callback=self._receive_event)
        self.evt_sub.add_event_subscription(event_type=OT.ProcessLifecycleEvent)
        self.evt_sub_gl = spawn(self.evt_sub.listen)
        self.evt_sub.get_ready_event().wait()

        log.info("PD Aggregator - event and heartbeat subscribers started")
Ejemplo n.º 11
0
    def start(self):
        # Create our own queue for container heartbeats and broadcasts
        topic = get_safe(self._pd_core.pd_cfg,
                         "aggregator.container_topic") or "bx_containers"
        queue_name = "pd_aggregator_%s_%s" % (
            topic, create_valid_identifier(self.container.id, dot_sub="_"))
        self.sub_cont = Subscriber(binding=topic,
                                   from_name=queue_name,
                                   auto_delete=True,
                                   callback=self._receive_container_info)
        self.sub_cont_gl = spawn(self.sub_cont.listen)
        self.sub_cont.get_ready_event().wait()

        self.evt_sub = EventSubscriber(event_type=OT.ContainerLifecycleEvent,
                                       callback=self._receive_event)
        self.evt_sub.add_event_subscription(
            event_type=OT.ProcessLifecycleEvent)
        self.evt_sub_gl = spawn(self.evt_sub.listen)
        self.evt_sub.get_ready_event().wait()

        log.info("PD Aggregator - event and heartbeat subscribers started")
Ejemplo n.º 12
0
    def __init__(self, container, config):
        self.container = container
        self.pd_cfg = config or {}
        self._enabled = False
        self.pd_region = get_safe(self.pd_cfg, "container.execution_engine.deployment.region") or "default"
        self.pd_scope = "PD_{}".format(self.pd_region)

        # Component that determines one leader in the distributed system
        self.leader_manager = LeaderManager(self.pd_scope, container=self.container)

        # The authoritative process registry
        self.registry = ProcessDispatcherRegistry(pd_core=self)

        # Component that listens to external input such as heartbeats
        self.aggregator = ProcessDispatcherAggregator(pd_core=self)

        # Component that executes actions
        self.executor = pd_executor_factory("global", pd_core=self)

        # The decision engine
        self.engine = ProcessDispatcherDecisionEngine(pd_core=self)

        self.pd_client = ProcessDispatcherClient(self.container)
Ejemplo n.º 13
0
 def _load_rules(self):
     self.rules_cfg = get_safe(self._pd_core.pd_cfg, "engine.dispatch_rules") or []
     self.default_engine = get_safe(self._pd_core.pd_cfg, "engine.default_engine") or "default"
Ejemplo n.º 14
0
    def create_object_from_cfg(self, cfg, objtype, key="resource", prefix="", existing_obj=None):
        """
        Construct an IonObject of a determined type from given config dict with attributes.
        Convert all attributes according to their schema target type. Supports nested objects.
        Supports edit of objects of same type.
        """
        log.trace("Create object type=%s, prefix=%s", objtype, prefix)
        if objtype == "dict":
            schema = None
        else:
            schema = self._get_object_class(objtype)._schema
        obj_fields = {}         # Attributes for IonObject creation as dict
        nested_done = set()      # Names of attributes with nested objects already created
        obj_cfg = get_safe(cfg, key)
        for subkey, value in obj_cfg.iteritems():
            if subkey.startswith(prefix):
                attr = subkey[len(prefix):]
                if '.' in attr:     # We are a parent entry
                    # TODO: Make sure to not create nested object multiple times
                    slidx = attr.find('.')
                    nested_obj_field = attr[:slidx]
                    parent_field = attr[:slidx+1]
                    nested_prefix = prefix + parent_field    # prefix plus nested object name
                    if '[' in nested_obj_field and nested_obj_field[-1] == ']':
                        sqidx = nested_obj_field.find('[')
                        nested_obj_type = nested_obj_field[sqidx+1:-1]
                        nested_obj_field = nested_obj_field[:sqidx]
                    elif objtype == "dict":
                        nested_obj_type = "dict"
                    else:
                        nested_obj_type = schema[nested_obj_field]['type']

                    # Make sure to not create the same nested object twice
                    if parent_field in nested_done:
                        continue

                    # Support direct indexing in a list
                    list_idx = -1
                    if nested_obj_type.startswith("list/"):
                        _, list_idx, nested_obj_type = nested_obj_type.split("/")
                        list_idx = int(list_idx)

                    log.trace("Get nested object field=%s type=%s, prefix=%s", nested_obj_field, nested_obj_type, prefix)
                    nested_obj = self.create_object_from_cfg(cfg, nested_obj_type, key, nested_prefix)

                    if list_idx >= 0:
                        my_list = obj_fields.setdefault(nested_obj_field, [])
                        if list_idx >= len(my_list):
                            my_list[len(my_list):list_idx] = [None]*(list_idx-len(my_list)+1)
                        my_list[list_idx] = nested_obj
                    else:
                        obj_fields[nested_obj_field] = nested_obj

                    nested_done.add(parent_field)

                elif objtype == "dict":
                    # TODO: What about type?
                    obj_fields[attr] = value

                elif attr in schema:    # We are the leaf attribute
                    try:
                        if value:
                            fieldvalue = get_typed_value(value, schema[attr])
                            obj_fields[attr] = fieldvalue
                    except Exception:
                        log.warn("Object type=%s, prefix=%s, field=%s cannot be converted to type=%s. Value=%s",
                            objtype, prefix, attr, schema[attr]['type'], value, exc_info=True)
                        #fieldvalue = str(fieldvalue)
                else:
                    # warn about unknown fields just once -- not on each row
                    log.warn("Skipping unknown field in %s: %s%s", objtype, prefix, attr)

        if objtype == "dict":
            obj = obj_fields
        else:
            if existing_obj:
                # Edit attributes
                if existing_obj.type_ != objtype:
                    raise Inconsistent("Cannot edit resource. Type mismatch old=%s, new=%s" % (existing_obj.type_, objtype))
                # TODO: Don't edit empty nested attributes
                for attr in list(obj_fields.keys()):
                    if not obj_fields[attr]:
                        del obj_fields[attr]
                for attr in ('alt_ids','_id','_rev','type_'):
                    if attr in obj_fields:
                        del obj_fields[attr]
                existing_obj.__dict__.update(obj_fields)
                log.trace("Update object type %s using field names %s", objtype, obj_fields.keys())
                obj = existing_obj
            else:
                if cfg.get(KEY_ID, None) and 'alt_ids' in schema:
                    if 'alt_ids' in obj_fields:
                        obj_fields['alt_ids'].append("PRE:"+cfg[KEY_ID])
                    else:
                        obj_fields['alt_ids'] = ["PRE:"+cfg[KEY_ID]]

                log.trace("Create object type %s from field names %s", objtype, obj_fields.keys())
                obj = IonObject(objtype, **obj_fields)
        return obj
Ejemplo n.º 15
0
    def create_object_from_cfg(self, cfg, objtype, key="resource", prefix="", existing_obj=None):
        """
        Construct an IonObject of a determined type from given config dict with attributes.
        Convert all attributes according to their schema target type. Supports nested objects.
        Supports edit of objects of same type.
        """
        log.trace("Create object type=%s, prefix=%s", objtype, prefix)
        if objtype == "dict":
            schema = None
        else:
            schema = self._get_object_class(objtype)._schema
        obj_fields = {}         # Attributes for IonObject creation as dict
        nested_done = set()      # Names of attributes with nested objects already created
        obj_cfg = get_safe(cfg, key)
        for subkey, value in obj_cfg.iteritems():
            if subkey.startswith(prefix):
                attr = subkey[len(prefix):]
                if '.' in attr:     # We are a parent entry
                    # TODO: Make sure to not create nested object multiple times
                    slidx = attr.find('.')
                    nested_obj_field = attr[:slidx]
                    parent_field = attr[:slidx+1]
                    nested_prefix = prefix + parent_field    # prefix plus nested object name
                    if '[' in nested_obj_field and nested_obj_field[-1] == ']':
                        sqidx = nested_obj_field.find('[')
                        nested_obj_type = nested_obj_field[sqidx+1:-1]
                        nested_obj_field = nested_obj_field[:sqidx]
                    elif objtype == "dict":
                        nested_obj_type = "dict"
                    else:
                        nested_obj_type = schema[nested_obj_field]['type']

                    # Make sure to not create the same nested object twice
                    if parent_field in nested_done:
                        continue

                    # Support direct indexing in a list
                    list_idx = -1
                    if nested_obj_type.startswith("list/"):
                        _, list_idx, nested_obj_type = nested_obj_type.split("/")
                        list_idx = int(list_idx)

                    log.trace("Get nested object field=%s type=%s, prefix=%s", nested_obj_field, nested_obj_type, prefix)
                    nested_obj = self.create_object_from_cfg(cfg, nested_obj_type, key, nested_prefix)

                    if list_idx >= 0:
                        my_list = obj_fields.setdefault(nested_obj_field, [])
                        if list_idx >= len(my_list):
                            my_list[len(my_list):list_idx] = [None]*(list_idx-len(my_list)+1)
                        my_list[list_idx] = nested_obj
                    else:
                        obj_fields[nested_obj_field] = nested_obj

                    nested_done.add(parent_field)

                elif objtype == "dict":
                    # TODO: What about type?
                    obj_fields[attr] = value

                elif attr in schema:    # We are the leaf attribute
                    try:
                        if value:
                            fieldvalue = get_typed_value(value, schema[attr])
                            obj_fields[attr] = fieldvalue
                    except Exception:
                        log.warn("Object type=%s, prefix=%s, field=%s cannot be converted to type=%s. Value=%s",
                            objtype, prefix, attr, schema[attr]['type'], value, exc_info=True)
                        #fieldvalue = str(fieldvalue)
                else:
                    # warn about unknown fields just once -- not on each row
                    log.warn("Skipping unknown field in %s: %s%s", objtype, prefix, attr)

        if objtype == "dict":
            obj = obj_fields
        else:
            if existing_obj:
                # Edit attributes
                if existing_obj.type_ != objtype:
                    raise Inconsistent("Cannot edit resource. Type mismatch old=%s, new=%s" % (existing_obj.type_, objtype))
                # TODO: Don't edit empty nested attributes
                for attr in list(obj_fields.keys()):
                    if not obj_fields[attr]:
                        del obj_fields[attr]
                for attr in ('alt_ids','_id','_rev','type_'):
                    if attr in obj_fields:
                        del obj_fields[attr]
                existing_obj.__dict__.update(obj_fields)
                log.trace("Update object type %s using field names %s", objtype, obj_fields.keys())
                obj = existing_obj
            else:
                if cfg.get(KEY_ID, None) and 'alt_ids' in schema:
                    if 'alt_ids' in obj_fields:
                        obj_fields['alt_ids'].append("PRE:"+cfg[KEY_ID])
                    else:
                        obj_fields['alt_ids'] = ["PRE:"+cfg[KEY_ID]]

                log.trace("Create object type %s from field names %s", objtype, obj_fields.keys())
                obj = IonObject(objtype, **obj_fields)
        return obj