def start_replication(self): # TODO: We might want to read from the position where the pghoard slot is # when restarting pghoard, instead of always reading from current position # at the time of starting replication. The slot position is unfortunately # not available on the replication protocol side and would have to be queried # through a regular PG connection. Currently we workaround this by reading # it back from pghoard's state file. self.c.execute("IDENTIFY_SYSTEM") identify_system = self.c.fetchone() self.log.debug("System identified itself as: %r", identify_system) timeline = identify_system[1] self.fetch_timeline_history_files(timeline) # Figure out the LSN we should try to replicate from if self.last_flushed_lsn: log, pos, _ = convert_integer_to_lsn(self.last_flushed_lsn) lsn = "{:X}/{:X}".format(log, pos) else: lsn = get_lsn_from_start_of_wal_file(identify_system[2]) self.log.info("Starting replication from %r, timeline: %r with slot: %r", lsn, timeline, self.replication_slot) if self.replication_slot: self.c.start_replication( slot_name=self.replication_slot, slot_type=REPLICATION_PHYSICAL, start_lsn=lsn, timeline=timeline) else: self.c.start_replication( start_lsn=lsn, timeline=timeline) return timeline
def start_replication(self): # TODO: We might want to read from the position where the pghoard slot is # when restarting pghoard, instead of always reading from current position # at the time of starting replication. The slot position is unfortunately # not available on the replication protocol side and would have to be queried # through a regular PG connection. Currently we workaround this by reading # it back from pghoard's state file. self.c.execute("IDENTIFY_SYSTEM") identify_system = self.c.fetchone() self.log.debug("System identified itself as: %r", identify_system) timeline = identify_system[1] self.fetch_timeline_history_files(timeline) # Figure out the LSN we should try to replicate from if self.last_flushed_lsn: log, pos, _ = convert_integer_to_lsn(self.last_flushed_lsn) lsn = "{:X}/{:X}".format(log, pos) else: lsn = get_lsn_from_start_of_wal_file(identify_system[2]) self.log.info( "Starting replication from %r, timeline: %r with slot: %r", lsn, timeline, self.replication_slot) if self.replication_slot: self.c.start_replication(slot_name=self.replication_slot, slot_type=REPLICATION_PHYSICAL, start_lsn=lsn, timeline=timeline) else: self.c.start_replication(start_lsn=lsn, timeline=timeline) return timeline
def run(self): self._init_cursor() if self.replication_slot: self.create_replication_slot() timeline = self.start_replication() while self.running: wal_name = None try: msg = self.c.read_message() except psycopg2.DatabaseError as ex: self.log.exception("Unexpected exception in reading walreceiver msg") self.stats.unexpected_exception(ex, where="walreceiver_run") continue self.log.debug("replication_msg: %r, buffer: %r/%r", msg, self.buffer.tell(), XLOG_SEG_SIZE) if msg: self.latest_activity = datetime.datetime.utcnow() log, _, seg = convert_integer_to_lsn(msg.data_start) wal_name = name_for_tli_log_seg(timeline, log, seg) if not self.latest_wal: self.latest_wal_start = msg.data_start self.latest_wal = wal_name self.buffer.write(msg.payload) # TODO: Calculate end pos and transmit that? msg.cursor.send_feedback(write_lsn=msg.data_start) if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= XLOG_SEG_SIZE: self.switch_xlog() for wal_start, queue in self.callbacks.items(): with suppress(Empty): transfer_result = queue.get_nowait() self.log.debug("Transfer result: %r", transfer_result) self.completed_wal_segments.add(wal_start) for completed_lsn in sorted(self.completed_wal_segments): self.callbacks.pop(completed_lsn) if self.callbacks: if completed_lsn > min(self.callbacks): pass # Do nothing since a smaller lsn is still being transferred else: # Earlier lsn than earlist on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) else: # No on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) if not msg: timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds() with suppress(InterruptedError): if not any(select.select([self.c], [], [], max(0, timeout))): self.c.send_feedback() # timing out, send keepalive
def run(self): self._init_cursor() if self.replication_slot: self.create_replication_slot() timeline = self.start_replication() while self.running: wal_name = None try: msg = self.c.read_message() except psycopg2.DatabaseError as ex: self.log.exception("Unexpected exception in reading walreceiver msg") self.stats.unexpected_exception(ex, where="walreceiver_run") continue self.log.debug("replication_msg: %r, buffer: %r/%r", msg, self.buffer.tell(), WAL_SEG_SIZE) if msg: self.latest_activity = datetime.datetime.utcnow() log, _, seg = convert_integer_to_lsn(msg.data_start) wal_name = name_for_tli_log_seg(timeline, log, seg) if not self.latest_wal: self.latest_wal_start = msg.data_start self.latest_wal = wal_name self.buffer.write(msg.payload) # TODO: Calculate end pos and transmit that? msg.cursor.send_feedback(write_lsn=msg.data_start) if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= WAL_SEG_SIZE: self.switch_wal() for wal_start, queue in self.callbacks.items(): with suppress(Empty): transfer_result = queue.get_nowait() self.log.debug("Transfer result: %r", transfer_result) self.completed_wal_segments.add(wal_start) for completed_lsn in sorted(self.completed_wal_segments): self.callbacks.pop(completed_lsn) if self.callbacks: if completed_lsn > min(self.callbacks): pass # Do nothing since a smaller lsn is still being transferred else: # Earlier lsn than earlist on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) else: # No on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) if not msg: timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds() with suppress(InterruptedError): if not any(select.select([self.c], [], [], max(0, timeout))): self.c.send_feedback() # timing out, send keepalive