def start(self): """ Starts all the actors to run a system of process applications. """ # Check we have the infrastructure classes we need. for process_class in self.system.process_classes.values(): if not isinstance(process_class, ApplicationWithConcreteInfrastructure): if not self.infrastructure_class: raise ProgrammingError("infrastructure_class is not set") elif not issubclass(self.infrastructure_class, ApplicationWithConcreteInfrastructure): raise ProgrammingError( "infrastructure_class is not a subclass of {}".format( ApplicationWithConcreteInfrastructure)) # Get the DB_URI. # Todo: Support different URI for different application classes. env_vars = {} db_uri = self.db_uri or os.environ.get("DB_URI") if db_uri is not None: env_vars["DB_URI"] = db_uri # Start processes. for pipeline_id in self.pipeline_ids: for process_name, process_class in self.system.process_classes.items( ): ray_process_id = RayProcess.remote( application_process_class=process_class, infrastructure_class=self.infrastructure_class, env_vars=env_vars, poll_interval=self.poll_interval, pipeline_id=pipeline_id, setup_tables=self.setup_tables, ) self.ray_processes[(process_name, pipeline_id)] = ray_process_id init_ids = [] for key, ray_process in self.ray_processes.items(): process_name, pipeline_id = key upstream_names = self.system.upstream_names[process_name] downstream_names = self.system.downstream_names[process_name] downstream_processes = { name: self.ray_processes[(name, pipeline_id)] for name in downstream_names } upstream_processes = {} for upstream_name in upstream_names: upstream_process = self.ray_processes[(upstream_name, pipeline_id)] upstream_processes[upstream_name] = upstream_process init_ids.append( ray_process.init.remote(upstream_processes, downstream_processes)) ray.get(init_ids)
def loop_on_prompts(self) -> None: # Run once, in case prompts were missed. self.run_process() # Loop on getting prompts. while True: try: # Todo: Make the poll interval gradually increase if there are only # timeouts? item = self.inbox.get(timeout=self.poll_interval) self.inbox.task_done() if isinstance(item, PromptToQuit): self.process.close() break elif isinstance(item, PromptToPull): self.run_process(item) else: raise ProgrammingError( "Unsupported prompt: {}".format(item)) except Empty: # Basically, we're polling after a timeout. self.run_process()
def follow( self, upstream_application_name: str, notification_log: AbstractNotificationLog ) -> None: """ Sets up process application to follow the given notification log of an upstream application. :param upstream_application_name: Name of the upstream application. :param notification_log: Notification log that will be processed. """ if ( upstream_application_name == self.name and self.apply_policy_to_generated_events ): raise ProgrammingError( "Process application not allowed to follow itself because " "its 'apply_policy_to_generated_events' attribute is True." ) # Create a reader. reader = self.notification_log_reader_class( notification_log, use_direct_query_if_available=self.use_direct_query_if_available, ) self.readers[upstream_application_name] = reader
def bind( self: TSystem, infrastructure_class: Type[ApplicationWithConcreteInfrastructure] ) -> TSystem: """ Constructs a system object that has an infrastructure class from a system object constructed without infrastructure class. Raises ProgrammingError if already have an infrastructure class. :param infrastructure_class: :return: System object that has an infrastructure class. :rtype: System """ # Check system doesn't already have an infrastructure class. if self.infrastructure_class: raise ProgrammingError( "System already has an infrastructure class") # Clone the system object, and set the infrastructure class. system = type(self).__new__(type(self)) system.__dict__.update(dict(deepcopy(self.__dict__))) system.__dict__.update(infrastructure_class=infrastructure_class) return system
def __init__(self, always_check_expected_version=False, always_write_entity_version=False): """ Base class for a persistent collection of stored events. """ self.always_check_expected_version = always_check_expected_version self.always_write_entity_version = always_write_entity_version if self.always_check_expected_version and not self.always_write_entity_version: raise ProgrammingError( "If versions are checked, they must also be written.")
def construct_app( self, process_class: Type[TProcessApplication], infrastructure_class: Optional[ Type[ApplicationWithConcreteInfrastructure] ] = None, **kwargs: Any, ) -> TProcessApplication: """ Constructs process application from given ``process_class``. """ # If process class isn't already an infrastructure class, then # subclass the process class with concrete infrastructure. if not issubclass(process_class, ApplicationWithConcreteInfrastructure): # Default to PopoApplication infrastructure. if infrastructure_class is None: infrastructure_class = self.infrastructure_class or PopoApplication # Assert that we now have an application with concrete infrastructure. if not issubclass( infrastructure_class, ApplicationWithConcreteInfrastructure ): raise ProgrammingError( "Given infrastructure_class {} is not subclass of {}" "".format( infrastructure_class, ApplicationWithConcreteInfrastructure ) ) # Subclass the process application class with the infrastructure class. process_class = process_class.mixin(infrastructure_class) assert issubclass(process_class, ApplicationWithConcreteInfrastructure) # Set 'session' and 'setup_table' in kwargs. kwargs = dict(kwargs) if "session" not in kwargs and process_class.is_constructed_with_session: kwargs["session"] = self.session or self.shared_session if "setup_tables" not in kwargs and self.setup_tables: kwargs["setup_table"] = self.setup_tables # Construct the process application. app = process_class(**kwargs) # Catch the session, so it can be shared. if self.session is None and self.shared_session is None: if process_class.is_constructed_with_session and self.is_session_shared: if self.shared_session is None: self.shared_session = app.session assert isinstance(app, ProcessApplication), app return app
def _raise_on_missing_infrastructure(self, what_is_missing): msg = "Application class %s does not have a %s." % ( type(self).__name__, what_is_missing, ) if not isinstance(self, ApplicationWithConcreteInfrastructure): msg += ( " and is not an ApplicationWithConcreteInfrastructure." " Try using or inheriting from or mixin() an application" " class with concrete infrastructure such as SQLAlchemyApplication" " or DjangoApplication or AxonApplication.") raise ProgrammingError(msg)
def delete_record(self, record): """ Permanently removes record from table. """ try: self.session.delete(record) self.session.commit() except Exception as e: self.session.rollback() raise ProgrammingError(e) finally: self.session.close()
def __enter__(self: TSystemRunner) -> TSystemRunner: """ Supports usage of a system runner as a context manager. """ assert isinstance(self, AbstractSystemRunner) # For PyCharm navigation. if self.system.runner is None or self.system.runner() is None: self.system.runner = weakref.ref(self) else: raise ProgrammingError( "System is already running: {}".format(self.system.runner) ) self.start() return self
def follow(self, upstream_application_name: str, notification_log: AbstractNotificationLog) -> None: if (upstream_application_name == self.name and self.apply_policy_to_generated_events): raise ProgrammingError( "Process application not allowed to follow itself because " "its 'apply_policy_to_generated_events' attribute is True.") # Create a reader. reader = self.notification_log_reader_class( notification_log, use_direct_query_if_available=self.use_direct_query_if_available, ) self.readers[upstream_application_name] = reader
def write_records( self, records: Iterable[Any], tracking_kwargs: Optional[TrackingKwargs] = None, orm_objs_pending_save: Optional[Sequence[Any]] = None, orm_objs_pending_delete: Optional[Sequence[Any]] = None, ) -> None: with self._rw_lock.gen_wlock(): # Write event and notification records. if self.notification_id_name: records = list(records) all_notification_ids = set( getattr(r, self.notification_id_name) for r in records) if None in all_notification_ids: if len(all_notification_ids) > 1: raise ProgrammingError("Only some records have IDs") for record in records: self._insert_record(record) if tracking_kwargs: # Write a tracking record. upstream_application_name = tracking_kwargs[ "upstream_application_name"] application_name = tracking_kwargs["application_name"] notification_id = tracking_kwargs["notification_id"] assert application_name == self.application_name, ( application_name, self.application_name, ) try: app_tracking_records = self._all_tracking_records[ application_name] except KeyError: app_tracking_records = {} self._all_tracking_records[ self.application_name] = app_tracking_records try: upstream_tracking_records = app_tracking_records[ upstream_application_name] except KeyError: upstream_tracking_records = set() app_tracking_records[ upstream_application_name] = upstream_tracking_records if notification_id in upstream_tracking_records: raise RecordConflictError( (application_name, upstream_application_name, notification_id)) upstream_tracking_records.add(notification_id)
def __enter__(self) -> "AbstractSystemRunner": """ Supports running a system object directly as a context manager. The system is run with the SingleThreadedRunner. """ from eventsourcing.system.runner import SingleThreadedRunner if self.runner: raise ProgrammingError("System is already running: {}".format(self.runner)) runner = SingleThreadedRunner( system=self, use_direct_query_if_available=self.use_direct_query_if_available, ) runner.start() self.runner = weakref.ref(runner) return runner
def start(self) -> None: if len(self.processes): raise ProgrammingError("Already running") # Construct the processes. for process_class in self.system.process_classes.values(): self._construct_app_by_class(process_class) # Tell each process which other processes to follow. for downstream_name, upstream_names in self.system.upstream_names.items(): downstream_process = self.processes[downstream_name] for upstream_name in upstream_names: upstream_process = self.processes[upstream_name] upstream_log = upstream_process.notification_log downstream_process.follow(upstream_name, upstream_log) # Do something to propagate prompts. subscribe(predicate=is_prompt_to_pull, handler=self.handle_prompt)
def attribute(getter): """ When used as a method decorator, returns a property object with the method as the getter and a setter defined to call instance method change_attribute(), which publishes an AttributeChanged event. """ if isfunction(getter): def setter(self, value): name = '_' + getter.__name__ self.__change_attribute__(name=name, value=value) def new_getter(self): name = '_' + getter.__name__ return getattr(self, name, None) return property(fget=new_getter, fset=setter, doc=getter.__doc__) else: raise ProgrammingError("Expected a function, got: {}".format(repr(getter)))
def __init__(self, system: System, infrastructure_class=None, setup_tables=False): self.system = system self.infrastructure_class = infrastructure_class or self.system.infrastructure_class # Check that a concrete infrastructure class is involved. if not all([ issubclass(c, ApplicationWithConcreteInfrastructure) for c in self.system.process_classes.values() ]): if self.infrastructure_class is None or not issubclass( self.infrastructure_class, ApplicationWithConcreteInfrastructure): raise ProgrammingError( "System runner needs a concrete application infrastructure class" ) self.setup_tables = setup_tables self.processes = {}
def construct_app(self, process_class, infrastructure_class=None, **kwargs): """ Constructs process application from given ``process_class``. """ kwargs = dict(kwargs) if 'session' not in kwargs and process_class.is_constructed_with_session: kwargs['session'] = self.session or self.shared_session if 'setup_tables' not in kwargs and self.setup_tables: kwargs['setup_table'] = self.setup_tables if not isinstance(process_class, ApplicationWithConcreteInfrastructure): # If process class isn't already an infrastructure class, then # use given arg, or attribute of this object, or PopoApplication. if infrastructure_class is None: infrastructure_class = self.infrastructure_class or PopoApplication if not issubclass(infrastructure_class, ApplicationWithConcreteInfrastructure): raise ProgrammingError( 'Given infrastructure_class {} is not subclass of {}' ''.format(infrastructure_class, ApplicationWithConcreteInfrastructure)) # Subclass the process application class with the infrastructure class. process_class = process_class.mixin(infrastructure_class) # Construct the process application. process = process_class(**kwargs) # Catch the session, so it can be shared. if self.session is None: if process_class.is_constructed_with_session and self.is_session_shared: if self.shared_session is None: self.shared_session = process.session return process
def bind(self, infrastructure_class): """ Constructs a system object that has an infrastructure class from system object constructed without infrastructure class. Raises ProgrammingError if already have an infrastructure class. :param infrastructure_class: :return: System object that has an infrastructure class. :rtype: System """ # Check system doesn't already have an infrastructure class. if self.infrastructure_class: raise ProgrammingError( 'System already has an infrastructure class') # Clone the system object, and set the infrastructure class. system = object.__new__(type(self)) system.__dict__.update(dict(deepcopy(self.__dict__))) system.__dict__.update(infrastructure_class=infrastructure_class) return system
def mutableproperty(getter): """ When used as a class method decorator, returns a property object with the method as the getter and a setter defined to call instance method _change_attribute(), which publishes an AttributeChanged event. """ if isfunction(getter): def setter(self, value): assert isinstance(self, EventSourcedEntity), type(self) name = '_' + getter.__name__ self._change_attribute(name=name, value=value) def new_getter(self): assert isinstance(self, EventSourcedEntity), type(self) name = '_' + getter.__name__ return getattr(self, name) return property(fget=new_getter, fset=setter) else: raise ProgrammingError("Expected a function, got: {}".format( repr(getter)))
def init(self, upstream_processes: dict, downstream_processes: dict) -> None: """ Initialise with actor handles for upstream and downstream processes. Need to initialise after construction so that all handles exist. """ self.upstream_processes = upstream_processes self.downstream_processes = downstream_processes # Subscribe to broadcast prompts published by the process application. subscribe(handler=self._enqueue_prompt_to_pull, predicate=is_prompt_to_pull) # Construct process application object. process_class = self.application_process_class if not isinstance(process_class, ApplicationWithConcreteInfrastructure): if self.infrastructure_class: process_class = process_class.mixin(self.infrastructure_class) else: raise ProgrammingError("infrastructure_class is not set") def construct_process(): return process_class(pipeline_id=self.pipeline_id, setup_table=self.setup_tables) self.process = self.do_db_job(construct_process, (), {}) assert isinstance(self.process, ProcessApplication), self.process # print(getpid(), "Created application process: %s" % self.process) for upstream_name, ray_notification_log in self.upstream_processes.items( ): # Make the process follow the upstream notification log. self.process.follow(upstream_name, ray_notification_log) self._reset_positions() self.positions_initialised.set()
def write_records( self, records: Iterable[Any], tracking_kwargs: Optional[TrackingKwargs] = None, orm_objs_pending_save: Optional[Sequence[Any]] = None, orm_objs_pending_delete: Optional[Sequence[Any]] = None, ) -> None: if not isinstance(records, list): records = list(records) # Prepare tracking params. if tracking_kwargs: tracking_params = [ tracking_kwargs[c] for c in self.tracking_record_field_names ] else: tracking_params = None use_insert_select_max_statement = False event_insert_statement = self.insert_values if self.notification_id_name: # This is a bit complicated, but basically the idea # is to support two alternatives: # - either use the "insert select max" statement # - or use the "insert values" statement # The "insert values" statement depends on the # notification IDs being provided by the application # and the "insert select max" creates these IDs in # the query. # The "insert values" statement provides the opportunity # to have some notification IDs being null, but the # "insert select max" statement doesn't. # Therefore, using "insert select max" should only be # used when all the given records have null value # for the notification IDs. And so the alternative # usage needs to provide true values for all the # notification IDs. These true values can involve # a reserved "event-not-notifiable" value which # indicates there shouldn't be a notification ID # for this record (so that it doesn't appear in the # "get notifications" query). all_ids = set( (getattr(r, self.notification_id_name) for r in records)) if None in all_ids: if len(all_ids) > 1: # Either all or zero records must have IDs. raise ProgrammingError("Only some records have IDs") elif self.contiguous_record_ids: # Do an "insert select max" from existing. use_insert_select_max_statement = True event_insert_statement = self.insert_select_max elif hasattr(self.record_class, "application_name"): # Can't allow auto-incrementing ID if table has field # application_name. We need values and don't have them. raise ProgrammingError("record ID not set when required") if self.contiguous_record_ids: all_event_record_params = [] for record in records: # Get values from record obj. # List of params, because dict doesn't work with Django # and SQLite. params = [] for col_name in self.field_names: col_value = getattr(record, col_name) meta = self.record_class._meta # type: ignore col_type = meta.get_field(col_name) # Prepare value for database. param = col_type.get_db_prep_value(col_value, connection) params.append(param) # Notification logs fields, to be inserted with event # fields. index_of_pipeline_id_param = None if hasattr(self.record_class, "application_name"): params.append(self.application_name) if hasattr(self.record_class, "pipeline_id"): params.append(self.pipeline_id) index_of_pipeline_id_param = len(params) - 1 if hasattr(record, "causal_dependencies"): params.append(record.causal_dependencies) if use_insert_select_max_statement: # Where clause fields. if hasattr(self.record_class, "application_name"): params.append(self.application_name) if hasattr(self.record_class, "pipeline_id"): params.append(self.pipeline_id) elif self.notification_id_name: if hasattr(self.record_class, self.notification_id_name): notification_id = getattr(record, self.notification_id_name) if notification_id == EVENT_NOT_NOTIFIABLE: notification_id = None params[index_of_pipeline_id_param] = None elif notification_id is not None: if not isinstance(notification_id, int): raise ProgrammingError( "%s must be an %s not %s: %s" % ( self.notification_id_name, int, type(notification_id), record.__dict__, )) params.append(notification_id) all_event_record_params.append(params) else: all_event_record_params = None try: with transaction.atomic( self.record_class.objects.db): # type: ignore with connection.cursor() as cursor: # Insert tracking record. if tracking_params is not None: cursor.execute(self.insert_tracking_record, tracking_params) if all_event_record_params is not None: for event_record_params in all_event_record_params: # Use cursor to execute event insert statement. cursor.execute(event_insert_statement, event_record_params) else: # This can only work for simple models, without application_name # and pipeline_id, because it relies on the auto-incrementing # ID. # Todo: If it's faster, change to use an "insert_values" raw # query. # Save record objects. for record in records: record.save() # Call 'save()' on each of the ORM objects pending save. if orm_objs_pending_save: for orm_obj in orm_objs_pending_save: orm_obj.save() # Call 'delete()' on each of the ORM objects pending delete. if orm_objs_pending_delete: for orm_obj in orm_objs_pending_delete: orm_obj.delete() except django.db.IntegrityError as e: self.raise_record_integrity_error(e)
def _insert_record(self, record: NamedTuple) -> None: position = getattr(record, self.field_names.position) if not isinstance(position, int): raise NotImplementedError( "Popo record manager only supports sequencing with integers, " "but position was a {}".format(type(position))) if self.notification_id_name: notification_id = getattr(record, self.notification_id_name) if notification_id != EVENT_NOT_NOTIFIABLE: if notification_id is not None: if not isinstance(notification_id, int): raise ProgrammingError("%s must be an %s not %s: %s" % ( self.notification_id_name, int, type(notification_id), record.__dict__, )) sequence_id = getattr(record, self.field_names.sequence_id) try: application_records = self._all_sequence_records[ self.application_name] except KeyError: sequence_records: Dict[int, NamedTuple] = {} application_records = {sequence_id: sequence_records} self._all_sequence_records[ self.application_name] = application_records self._all_sequence_max[self.application_name] = {} else: try: sequence_records = application_records[sequence_id] except KeyError: sequence_records = {} application_records[sequence_id] = sequence_records if position in sequence_records: raise RecordConflictError(position, len(sequence_records)) if self.notification_id_name: # Just make sure we aren't making a gap in the sequence. if sequence_records: max_position = self._all_sequence_max[ self.application_name][sequence_id] next_position = max_position + 1 else: next_position = 0 if position != next_position: raise AssertionError( "Next position for sequence {} is {}, not {}".format( sequence_id, next_position, position)) sequence_records[position] = record self._all_sequence_max[self.application_name][sequence_id] = position # Write a notification record. if self.notification_id_name: try: notification_records = self._all_notification_records[ self.application_name] except KeyError: notification_records = {} self._all_notification_records[ self.application_name] = notification_records if self.notification_id_name: notification_id = getattr(record, self.notification_id_name) if notification_id == EVENT_NOT_NOTIFIABLE: setattr(record, self.notification_id_name, None) else: if notification_id is None: notification_id = (self._get_max_record_id() or 0) + 1 setattr(record, self.notification_id_name, notification_id) notification_records[notification_id] = { "notification_id": notification_id, "sequenced_item": record, } self._all_notification_max[ self.application_name] = notification_id
def write_records( self, records: Iterable[Any], tracking_kwargs: Optional[TrackingKwargs] = None, orm_objs_pending_save: Optional[Sequence[Any]] = None, orm_objs_pending_delete: Optional[Sequence[Any]] = None, ) -> None: # Prepare tracking record statement. has_orm_objs = orm_objs_pending_delete or orm_objs_pending_save # Not using compiled statements because I'm not sure what # session.bind.begin() actually does. Seems ok but feeling # unsure. And the marginal performance improvement seems # not to be worth the risk of messing up transactions. # Todo: Environment variable? is_complied_statements_enabled = False use_compiled_statements = is_complied_statements_enabled and not has_orm_objs if tracking_kwargs: if not use_compiled_statements: tracking_record_statement = self.insert_tracking_record else: tracking_record_statement = self.insert_tracking_record_compiled else: tracking_record_statement = None # Prepare stored event record statement and params. all_params = [] event_record_statement = None if not isinstance(records, list): records = list(records) if records: # Prepare to insert event and notification records. if not use_compiled_statements: event_record_statement = self.insert_values else: event_record_statement = self.insert_values_compiled if self.notification_id_name: # This is a bit complicated, but basically the idea # is to support two alternatives: # - either use the "insert select max" statement # - or use the "insert values" statement # The "insert values" statement depends on the # notification IDs being provided by the application # and the "insert select max" creates these IDs in # the query. # The "insert values" statement provides the opportunity # to have some notification IDs being null, but the # "insert select max" statement doesn't. # Therefore, using "insert select max" should only be # used when all the given records have null value # for the notification IDs. And so the alternative # usage needs to provide true values for all the # notification IDs. These true values can involve # a reserved "event-not-notifiable" value which # indicates there shouldn't be a notification ID # for this record (so that it doesn't appear in the # "get notifications" query). all_ids = set( (getattr(r, self.notification_id_name) for r in records)) if None in all_ids: if len(all_ids) > 1: # Either all or zero records must have IDs. raise ProgrammingError("Only some records have IDs") elif self.contiguous_record_ids: # Do an "insert select max" from existing. if not use_compiled_statements: event_record_statement = self.insert_select_max else: event_record_statement = self.insert_select_max_compiled elif hasattr(self.record_class, "application_name"): # Can't allow auto-incrementing ID if table has field # application_name. We need values and don't have them. raise ProgrammingError( "record ID not set when required") for record in records: # Params for stored item itself (e.g. event). params = { name: getattr(record, name) for name in self.field_names } # Params for application partition (bounded context). if hasattr(self.record_class, "application_name"): params["application_name"] = self.application_name # Params for notification log. if self.notification_id_name: notification_id = getattr(record, self.notification_id_name) if notification_id == EVENT_NOT_NOTIFIABLE: params[self.notification_id_name] = None else: if notification_id is not None: if not isinstance(notification_id, int): raise ProgrammingError( "%s must be an %s not %s: %s" % ( self.notification_id_name, int, type(notification_id), record.__dict__, )) params[self.notification_id_name] = notification_id if hasattr(self.record_class, "pipeline_id"): if notification_id == EVENT_NOT_NOTIFIABLE: params["pipeline_id"] = None else: params["pipeline_id"] = self.pipeline_id if hasattr(record, "causal_dependencies"): params["causal_dependencies"] = record.causal_dependencies all_params.append(params) if not use_compiled_statements: s = self.session try: nothing_to_commit = True if tracking_kwargs: s.execute(tracking_record_statement, tracking_kwargs) nothing_to_commit = False # Commit custom ORM objects. if orm_objs_pending_save: for orm_obj in orm_objs_pending_save: s.add(orm_obj) nothing_to_commit = False if orm_objs_pending_delete: for orm_obj in orm_objs_pending_delete: s.delete(orm_obj) nothing_to_commit = False # Bulk insert event records. if all_params: s.execute(event_record_statement, all_params) nothing_to_commit = False if nothing_to_commit: return s.commit() except sqlalchemy.exc.IntegrityError as e: s.rollback() self.raise_record_integrity_error(e) except sqlalchemy.exc.DBAPIError as e: s.rollback() self.raise_operational_error(e) except: s.rollback() raise finally: s.close() else: try: with self.session.bind.begin() as connection: if tracking_kwargs: # Insert tracking record. connection.execute(tracking_record_statement, **tracking_kwargs) if all_params: # Bulk insert event records. connection.execute(event_record_statement, all_params) except sqlalchemy.exc.IntegrityError as e: self.raise_record_integrity_error(e) except sqlalchemy.exc.DBAPIError as e: self.raise_operational_error(e)
def write_records(self, records, tracking_kwargs=None, orm_objs=None): all_params = [] statement = None if records: # Prepare to insert event and notification records. statement = self.insert_values if self.notification_id_name: all_ids = set( (getattr(r, self.notification_id_name) for r in records)) if None in all_ids: if len(all_ids) > 1: # Either all or zero records must have IDs. raise ProgrammingError("Only some records have IDs") elif self.contiguous_record_ids: # Do an "insert select max" from existing. statement = self.insert_select_max elif hasattr(self.record_class, 'application_name'): # Can't allow auto-incrementing ID if table has field # application_name. We need values and don't have them. raise ProgrammingError( "record ID not set when required") for record in records: # Params for stored item itself (e.g. event). params = { name: getattr(record, name) for name in self.field_names } # Params for application partition (bounded context). if hasattr(self.record_class, 'application_name'): params['application_name'] = self.application_name # Params for notification log. if self.notification_id_name: notification_id = getattr(record, self.notification_id_name) if notification_id == 'event-not-notifiable': params[self.notification_id_name] = None else: params[self.notification_id_name] = notification_id if hasattr(self.record_class, 'pipeline_id'): if notification_id == 'event-not-notifiable': params['pipeline_id'] = None else: params['pipeline_id'] = self.pipeline_id if hasattr(record, 'causal_dependencies'): params['causal_dependencies'] = record.causal_dependencies all_params.append(params) try: nothing_to_commit = True # Commit custom ORM objects. if orm_objs: for orm_obj in orm_objs: self.session.add(orm_obj) nothing_to_commit = False # Insert tracking record. if tracking_kwargs: self.session.execute(self.insert_tracking_record, tracking_kwargs) nothing_to_commit = False # Bulk insert event records. if all_params: self.session.execute(statement, all_params) nothing_to_commit = False if nothing_to_commit: return self.session.commit() except IntegrityError as e: self.session.rollback() self.raise_record_integrity_error(e) except DBAPIError as e: self.session.rollback() self.raise_operational_error(e) except: self.session.rollback() raise finally: self.session.close()
def delete_record(self, record): assert isinstance(record, self.record_class), type(record) try: record.delete() except InvalidRequest as e: raise ProgrammingError(e)
def run(self) -> None: # Construct process application class. process_class = self.application_process_class if not isinstance(process_class, ApplicationWithConcreteInfrastructure): if self.infrastructure_class: process_class = process_class.mixin(self.infrastructure_class) else: raise ProgrammingError("infrastructure_class is not set") # Construct process application object. self.process: ProcessApplication = process_class( pipeline_id=self.pipeline_id, setup_table=self.setup_tables) # Follow upstream notification logs. for upstream_name in self.upstream_names: # Obtain a notification log object (local or remote) for the upstream # process. if upstream_name == self.process.name: # Upstream is this process's application, # so use own notification log. notification_log = self.process.notification_log else: # For a different application, we need to construct a notification # log with a record manager that has the upstream application ID. # Currently assumes all applications are using the same database # and record manager class. If it wasn't the same database,we would # to use a remote notification log, and upstream would need to provide # an API from which we can pull. It's not unreasonable to have a fixed # number of application processes connecting to the same database. record_manager = self.process.event_store.record_manager notification_log = RecordManagerNotificationLog( record_manager=record_manager.clone( application_name=upstream_name, # Todo: Check if setting pipeline_id is necessary (it's the # same?). pipeline_id=self.pipeline_id, ), section_size=self.process.notification_log_section_size, ) # Todo: Support upstream partition IDs different from self.pipeline_id? # Todo: Support combining partitions. Read from different partitions # but write to the same partition, # could be one os process that reads from many logs of the same # upstream app, or many processes each # reading one partition with contention writing to the same partition). # Todo: Support dividing partitions Read from one but write to many. # Maybe one process per # upstream partition, round-robin to pick partition for write. Or # have many processes reading # with each taking it in turn to skip processing somehow. # Todo: Dividing partitions would allow a stream to flow at the same # rate through slower # process applications. # Todo: Support merging results from "replicated state machines" - # could have a command # logging process that takes client commands and presents them in a # notification log. # Then the system could be deployed in different places, running # independently, receiving # the same commands, and running the same processes. The command # logging process could # be accompanied with a result logging process that reads results # from replicas as they # are available. Not sure what to do if replicas return different # things. If one replica # goes down, then it could resume by pulling events from another? Not # sure what to do. # External systems could be modelled as commands. # Make the process follow the upstream notification log. self.process.follow(upstream_name, notification_log) # Subscribe to broadcast prompts published by the process application. subscribe(handler=self.broadcast_prompt, predicate=is_prompt_to_pull) try: self.loop_on_prompts() finally: unsubscribe(handler=self.broadcast_prompt, predicate=is_prompt_to_pull)
def init(self, upstream_processes: dict, downstream_processes: dict) -> None: """ Initialise with actor handles for upstream and downstream processes. Need to initialise after construction so that all handles exist. """ self.upstream_processes = upstream_processes self.downstream_processes = downstream_processes # Subscribe to broadcast prompts published by the process application. subscribe(handler=self._enqueue_prompt_to_pull, predicate=is_prompt_to_pull) # Construct process application object. process_class = self.application_process_class if not isinstance(process_class, ApplicationWithConcreteInfrastructure): if self.infrastructure_class: process_class = process_class.mixin(self.infrastructure_class) else: raise ProgrammingError("infrastructure_class is not set") class MethodWrapper(object): def __init__(self, method): self.method = method def __call__(self, *args, **kwargs): try: return self.method(*args, **kwargs) except EventSourcingError as e: return ExceptionWrapper(e) class ProcessApplicationWrapper(object): def __init__(self, process_application): self.process_application = process_application def __getattr__(self, item): attribute = getattr(self.process_application, item) if ismethod(attribute): return MethodWrapper(attribute) else: return attribute def construct_process(): return process_class(pipeline_id=self.pipeline_id, setup_table=self.setup_tables) process_application = self.do_db_job(construct_process, (), {}) assert isinstance(process_application, ProcessApplication), process_application self.process_wrapper = ProcessApplicationWrapper(process_application) self.process_application = process_application for upstream_name, ray_notification_log in self.upstream_processes.items( ): # Make the process follow the upstream notification log. self.process_application.follow(upstream_name, ray_notification_log) self._reset_positions() self.positions_initialised.set()
def start(self) -> None: self.os_processes = [] self.manager = Manager() if TYPE_CHECKING: self.inboxes: Dict[Tuple[int, str], Queue[Prompt]] self.outboxes: Dict[Tuple[int, str], PromptOutbox[Tuple[int, str]]] self.inboxes = {} self.outboxes = {} # Setup queues. for pipeline_id in self.pipeline_ids: for process_name, upstream_names in self.system.upstream_names.items( ): inbox_id = (pipeline_id, process_name.lower()) if inbox_id not in self.inboxes: self.inboxes[inbox_id] = self.manager.Queue() for upstream_class_name in upstream_names: outbox_id = (pipeline_id, upstream_class_name.lower()) if outbox_id not in self.outboxes: self.outboxes[outbox_id] = PromptOutbox() if inbox_id not in self.outboxes[ outbox_id].downstream_inboxes: self.outboxes[outbox_id].downstream_inboxes[ inbox_id] = self.inboxes[inbox_id] # Check we have the infrastructure classes we need. for process_class in self.system.process_classes.values(): if not isinstance(process_class, ApplicationWithConcreteInfrastructure): if not self.infrastructure_class: raise ProgrammingError("infrastructure_class is not set") elif issubclass(self.infrastructure_class, PopoApplication): raise ProgrammingError( "Can't use %s with %s" % (type(self), self.infrastructure_class)) elif not issubclass(self.infrastructure_class, ApplicationWithConcreteInfrastructure): raise ProgrammingError( "infrastructure_class is not a subclass of {}".format( ApplicationWithConcreteInfrastructure)) # Subscribe to broadcast prompts published by a process # application in the parent operating system process. subscribe(handler=self.broadcast_prompt, predicate=is_prompt_to_pull) # Start operating system process. expect_tables_exist = False for pipeline_id in self.pipeline_ids: for process_name, upstream_names in self.system.upstream_names.items( ): process_class = self.system.process_classes[process_name] inbox = self.inboxes[(pipeline_id, process_name.lower())] outbox = self.outboxes.get((pipeline_id, process_name.lower())) os_process = OperatingSystemProcess( application_process_class=process_class, infrastructure_class=self.infrastructure_class, upstream_names=upstream_names, poll_interval=self.poll_interval, pipeline_id=pipeline_id, setup_tables=self.setup_tables, inbox=inbox, outbox=outbox, ) os_process.daemon = True os_process.start() self.os_processes.append(os_process) if self.setup_tables and not expect_tables_exist: # Avoid conflicts when creating tables. sleep(self.sleep_for_setup_tables) expect_tables_exist = True # Construct process applications in local process. for process_class in self.system.process_classes.values(): self.get(process_class)