def __init__( self, db_conn, db: Database, instance_name: str, table: str, instance_column: str, id_column: str, sequence_name: str, ): self._db = db self._instance_name = instance_name # We lock as some functions may be called from DB threads. self._lock = threading.Lock() self._current_positions = self._load_current_ids( db_conn, table, instance_column, id_column ) # Set of local IDs that we're still processing. The current position # should be less than the minimum of this set (if not empty). self._unfinished_ids = set() # type: Set[int] self._sequence_gen = PostgresSequenceGenerator(sequence_name)
def __init__( self, db_conn, db: DatabasePool, instance_name: str, table: str, instance_column: str, id_column: str, sequence_name: str, positive: bool = True, ): self._db = db self._instance_name = instance_name self._positive = positive self._return_factor = 1 if positive else -1 # We lock as some functions may be called from DB threads. self._lock = threading.Lock() # Note: If we are a negative stream then we still store all the IDs as # positive to make life easier for us, and simply negate the IDs when we # return them. self._current_positions = self._load_current_ids( db_conn, table, instance_column, id_column) # Set of local IDs that we're still processing. The current position # should be less than the minimum of this set (if not empty). self._unfinished_ids = set() # type: Set[int] # We track the max position where we know everything before has been # persisted. This is done by a) looking at the min across all instances # and b) noting that if we have seen a run of persisted positions # without gaps (e.g. 5, 6, 7) then we can skip forward (e.g. to 7). # # Note: There is no guarentee that the IDs generated by the sequence # will be gapless; gaps can form when e.g. a transaction was rolled # back. This means that sometimes we won't be able to skip forward the # position even though everything has been persisted. However, since # gaps should be relatively rare it's still worth doing the book keeping # that allows us to skip forwards when there are gapless runs of # positions. self._persisted_upto_position = (min(self._current_positions.values()) if self._current_positions else 0) self._known_persisted_positions = [] # type: List[int] self._sequence_gen = PostgresSequenceGenerator(sequence_name)
def __init__( self, db_conn, db: DatabasePool, stream_name: str, instance_name: str, tables: List[Tuple[str, str, str]], sequence_name: str, writers: List[str], positive: bool = True, ): self._db = db self._stream_name = stream_name self._instance_name = instance_name self._positive = positive self._writers = writers self._return_factor = 1 if positive else -1 # We lock as some functions may be called from DB threads. self._lock = threading.Lock() # Note: If we are a negative stream then we still store all the IDs as # positive to make life easier for us, and simply negate the IDs when we # return them. self._current_positions = {} # type: Dict[str, int] # Set of local IDs that we're still processing. The current position # should be less than the minimum of this set (if not empty). self._unfinished_ids = set() # type: Set[int] # Set of local IDs that we've processed that are larger than the current # position, due to there being smaller unpersisted IDs. self._finished_ids = set() # type: Set[int] # We track the max position where we know everything before has been # persisted. This is done by a) looking at the min across all instances # and b) noting that if we have seen a run of persisted positions # without gaps (e.g. 5, 6, 7) then we can skip forward (e.g. to 7). # # Note: There is no guarantee that the IDs generated by the sequence # will be gapless; gaps can form when e.g. a transaction was rolled # back. This means that sometimes we won't be able to skip forward the # position even though everything has been persisted. However, since # gaps should be relatively rare it's still worth doing the book keeping # that allows us to skip forwards when there are gapless runs of # positions. # # We start at 1 here as a) the first generated stream ID will be 2, and # b) other parts of the code assume that stream IDs are strictly greater # than 0. self._persisted_upto_position = (min(self._current_positions.values()) if self._current_positions else 1) self._known_persisted_positions = [] # type: List[int] self._sequence_gen = PostgresSequenceGenerator(sequence_name) # We check that the table and sequence haven't diverged. for table, _, id_column in tables: self._sequence_gen.check_consistency( db_conn, table=table, id_column=id_column, stream_name=stream_name, positive=positive, ) # This goes and fills out the above state from the database. self._load_current_ids(db_conn, tables)