Esempio n. 1
0
    def start_replication(self):
        # TODO: We might want to read from the position where the pghoard slot is
        # when restarting pghoard, instead of always reading from current position
        # at the time of starting replication. The slot position is unfortunately
        # not available on the replication protocol side and would have to be queried
        # through a regular PG connection. Currently we workaround this by reading
        # it back from pghoard's state file.
        self.c.execute("IDENTIFY_SYSTEM")
        identify_system = self.c.fetchone()
        self.log.debug("System identified itself as: %r", identify_system)
        timeline = identify_system[1]
        self.fetch_timeline_history_files(timeline)

        # Figure out the LSN we should try to replicate from
        if self.last_flushed_lsn:
            log, pos, _ = convert_integer_to_lsn(self.last_flushed_lsn)
            lsn = "{:X}/{:X}".format(log, pos)
        else:
            lsn = get_lsn_from_start_of_wal_file(identify_system[2])

        self.log.info("Starting replication from %r, timeline: %r with slot: %r",
                      lsn, timeline, self.replication_slot)
        if self.replication_slot:
            self.c.start_replication(
                slot_name=self.replication_slot,
                slot_type=REPLICATION_PHYSICAL,
                start_lsn=lsn,
                timeline=timeline)
        else:
            self.c.start_replication(
                start_lsn=lsn,
                timeline=timeline)
        return timeline
Esempio n. 2
0
    def start_replication(self):
        # TODO: We might want to read from the position where the pghoard slot is
        # when restarting pghoard, instead of always reading from current position
        # at the time of starting replication. The slot position is unfortunately
        # not available on the replication protocol side and would have to be queried
        # through a regular PG connection. Currently we workaround this by reading
        # it back from pghoard's state file.
        self.c.execute("IDENTIFY_SYSTEM")
        identify_system = self.c.fetchone()
        self.log.debug("System identified itself as: %r", identify_system)
        timeline = identify_system[1]
        self.fetch_timeline_history_files(timeline)

        # Figure out the LSN we should try to replicate from
        if self.last_flushed_lsn:
            log, pos, _ = convert_integer_to_lsn(self.last_flushed_lsn)
            lsn = "{:X}/{:X}".format(log, pos)
        else:
            lsn = get_lsn_from_start_of_wal_file(identify_system[2])

        self.log.info(
            "Starting replication from %r, timeline: %r with slot: %r", lsn,
            timeline, self.replication_slot)
        if self.replication_slot:
            self.c.start_replication(slot_name=self.replication_slot,
                                     slot_type=REPLICATION_PHYSICAL,
                                     start_lsn=lsn,
                                     timeline=timeline)
        else:
            self.c.start_replication(start_lsn=lsn, timeline=timeline)
        return timeline
Esempio n. 3
0
    def run(self):
        self._init_cursor()
        if self.replication_slot:
            self.create_replication_slot()
        timeline = self.start_replication()
        while self.running:
            wal_name = None
            try:
                msg = self.c.read_message()
            except psycopg2.DatabaseError as ex:
                self.log.exception("Unexpected exception in reading walreceiver msg")
                self.stats.unexpected_exception(ex, where="walreceiver_run")
                continue
            self.log.debug("replication_msg: %r, buffer: %r/%r",
                           msg, self.buffer.tell(), XLOG_SEG_SIZE)
            if msg:
                self.latest_activity = datetime.datetime.utcnow()
                log, _, seg = convert_integer_to_lsn(msg.data_start)
                wal_name = name_for_tli_log_seg(timeline, log, seg)

                if not self.latest_wal:
                    self.latest_wal_start = msg.data_start
                    self.latest_wal = wal_name
                self.buffer.write(msg.payload)

                # TODO: Calculate end pos and transmit that?
                msg.cursor.send_feedback(write_lsn=msg.data_start)

            if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= XLOG_SEG_SIZE:
                self.switch_xlog()

            for wal_start, queue in self.callbacks.items():
                with suppress(Empty):
                    transfer_result = queue.get_nowait()
                    self.log.debug("Transfer result: %r", transfer_result)
                    self.completed_wal_segments.add(wal_start)

            for completed_lsn in sorted(self.completed_wal_segments):
                self.callbacks.pop(completed_lsn)
                if self.callbacks:
                    if completed_lsn > min(self.callbacks):
                        pass  # Do nothing since a smaller lsn is still being transferred
                    else:  # Earlier lsn than earlist on-going transfer, just advance flush_lsn
                        self.c.send_feedback(flush_lsn=completed_lsn)
                        self.completed_wal_segments.discard(completed_lsn)
                        self.last_flushed_lsn = completed_lsn
                        self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)
                else:  # No on-going transfer, just advance flush_lsn
                    self.c.send_feedback(flush_lsn=completed_lsn)
                    self.completed_wal_segments.discard(completed_lsn)
                    self.last_flushed_lsn = completed_lsn
                    self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)

            if not msg:
                timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds()
                with suppress(InterruptedError):
                    if not any(select.select([self.c], [], [], max(0, timeout))):
                        self.c.send_feedback()  # timing out, send keepalive
Esempio n. 4
0
    def run(self):
        self._init_cursor()
        if self.replication_slot:
            self.create_replication_slot()
        timeline = self.start_replication()
        while self.running:
            wal_name = None
            try:
                msg = self.c.read_message()
            except psycopg2.DatabaseError as ex:
                self.log.exception("Unexpected exception in reading walreceiver msg")
                self.stats.unexpected_exception(ex, where="walreceiver_run")
                continue
            self.log.debug("replication_msg: %r, buffer: %r/%r",
                           msg, self.buffer.tell(), WAL_SEG_SIZE)
            if msg:
                self.latest_activity = datetime.datetime.utcnow()
                log, _, seg = convert_integer_to_lsn(msg.data_start)
                wal_name = name_for_tli_log_seg(timeline, log, seg)

                if not self.latest_wal:
                    self.latest_wal_start = msg.data_start
                    self.latest_wal = wal_name
                self.buffer.write(msg.payload)

                # TODO: Calculate end pos and transmit that?
                msg.cursor.send_feedback(write_lsn=msg.data_start)

            if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= WAL_SEG_SIZE:
                self.switch_wal()

            for wal_start, queue in self.callbacks.items():
                with suppress(Empty):
                    transfer_result = queue.get_nowait()
                    self.log.debug("Transfer result: %r", transfer_result)
                    self.completed_wal_segments.add(wal_start)

            for completed_lsn in sorted(self.completed_wal_segments):
                self.callbacks.pop(completed_lsn)
                if self.callbacks:
                    if completed_lsn > min(self.callbacks):
                        pass  # Do nothing since a smaller lsn is still being transferred
                    else:  # Earlier lsn than earlist on-going transfer, just advance flush_lsn
                        self.c.send_feedback(flush_lsn=completed_lsn)
                        self.completed_wal_segments.discard(completed_lsn)
                        self.last_flushed_lsn = completed_lsn
                        self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)
                else:  # No on-going transfer, just advance flush_lsn
                    self.c.send_feedback(flush_lsn=completed_lsn)
                    self.completed_wal_segments.discard(completed_lsn)
                    self.last_flushed_lsn = completed_lsn
                    self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)

            if not msg:
                timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds()
                with suppress(InterruptedError):
                    if not any(select.select([self.c], [], [], max(0, timeout))):
                        self.c.send_feedback()  # timing out, send keepalive