Ejemplo n.º 1
0
    def __init__(
        self,
        db_conn,
        db: Database,
        instance_name: str,
        table: str,
        instance_column: str,
        id_column: str,
        sequence_name: str,
    ):
        self._db = db
        self._instance_name = instance_name

        # We lock as some functions may be called from DB threads.
        self._lock = threading.Lock()

        self._current_positions = self._load_current_ids(
            db_conn, table, instance_column, id_column
        )

        # Set of local IDs that we're still processing. The current position
        # should be less than the minimum of this set (if not empty).
        self._unfinished_ids = set()  # type: Set[int]

        self._sequence_gen = PostgresSequenceGenerator(sequence_name)
Ejemplo n.º 2
0
    def __init__(
        self,
        db_conn,
        db: DatabasePool,
        instance_name: str,
        table: str,
        instance_column: str,
        id_column: str,
        sequence_name: str,
        positive: bool = True,
    ):
        self._db = db
        self._instance_name = instance_name
        self._positive = positive
        self._return_factor = 1 if positive else -1

        # We lock as some functions may be called from DB threads.
        self._lock = threading.Lock()

        # Note: If we are a negative stream then we still store all the IDs as
        # positive to make life easier for us, and simply negate the IDs when we
        # return them.
        self._current_positions = self._load_current_ids(
            db_conn, table, instance_column, id_column)

        # Set of local IDs that we're still processing. The current position
        # should be less than the minimum of this set (if not empty).
        self._unfinished_ids = set()  # type: Set[int]

        # We track the max position where we know everything before has been
        # persisted. This is done by a) looking at the min across all instances
        # and b) noting that if we have seen a run of persisted positions
        # without gaps (e.g. 5, 6, 7) then we can skip forward (e.g. to 7).
        #
        # Note: There is no guarentee that the IDs generated by the sequence
        # will be gapless; gaps can form when e.g. a transaction was rolled
        # back. This means that sometimes we won't be able to skip forward the
        # position even though everything has been persisted. However, since
        # gaps should be relatively rare it's still worth doing the book keeping
        # that allows us to skip forwards when there are gapless runs of
        # positions.
        self._persisted_upto_position = (min(self._current_positions.values())
                                         if self._current_positions else 0)
        self._known_persisted_positions = []  # type: List[int]

        self._sequence_gen = PostgresSequenceGenerator(sequence_name)
Ejemplo n.º 3
0
    def __init__(
        self,
        db_conn,
        db: DatabasePool,
        stream_name: str,
        instance_name: str,
        tables: List[Tuple[str, str, str]],
        sequence_name: str,
        writers: List[str],
        positive: bool = True,
    ):
        self._db = db
        self._stream_name = stream_name
        self._instance_name = instance_name
        self._positive = positive
        self._writers = writers
        self._return_factor = 1 if positive else -1

        # We lock as some functions may be called from DB threads.
        self._lock = threading.Lock()

        # Note: If we are a negative stream then we still store all the IDs as
        # positive to make life easier for us, and simply negate the IDs when we
        # return them.
        self._current_positions = {}  # type: Dict[str, int]

        # Set of local IDs that we're still processing. The current position
        # should be less than the minimum of this set (if not empty).
        self._unfinished_ids = set()  # type: Set[int]

        # Set of local IDs that we've processed that are larger than the current
        # position, due to there being smaller unpersisted IDs.
        self._finished_ids = set()  # type: Set[int]

        # We track the max position where we know everything before has been
        # persisted. This is done by a) looking at the min across all instances
        # and b) noting that if we have seen a run of persisted positions
        # without gaps (e.g. 5, 6, 7) then we can skip forward (e.g. to 7).
        #
        # Note: There is no guarantee that the IDs generated by the sequence
        # will be gapless; gaps can form when e.g. a transaction was rolled
        # back. This means that sometimes we won't be able to skip forward the
        # position even though everything has been persisted. However, since
        # gaps should be relatively rare it's still worth doing the book keeping
        # that allows us to skip forwards when there are gapless runs of
        # positions.
        #
        # We start at 1 here as a) the first generated stream ID will be 2, and
        # b) other parts of the code assume that stream IDs are strictly greater
        # than 0.
        self._persisted_upto_position = (min(self._current_positions.values())
                                         if self._current_positions else 1)
        self._known_persisted_positions = []  # type: List[int]

        self._sequence_gen = PostgresSequenceGenerator(sequence_name)

        # We check that the table and sequence haven't diverged.
        for table, _, id_column in tables:
            self._sequence_gen.check_consistency(
                db_conn,
                table=table,
                id_column=id_column,
                stream_name=stream_name,
                positive=positive,
            )

        # This goes and fills out the above state from the database.
        self._load_current_ids(db_conn, tables)