def delete_remote_wal_before(self, wal_segment, site): self.log.debug("Starting WAL deletion from: %r before: %r", site, wal_segment) storage = self.site_transfers.get(site) valid_timeline = True tli, log, seg = wal.name_to_tli_log_seg(wal_segment) while True: if valid_timeline: # Decrement one segment if we're on a valid timeline if seg == 0 and log == 0: break seg, log = wal.get_previous_wal_on_same_timeline(seg, log) wal_path = os.path.join(self.config.get("path_prefix", ""), site, "xlog", wal.name_for_tli_log_seg(tli, log, seg)) self.log.debug("Deleting wal_file: %r", wal_path) try: storage.delete_key(wal_path) valid_timeline = True except FileNotFoundFromStorageError: if not valid_timeline or tli <= 1: # if we didn't find any WALs to delete on this timeline or we're already at # timeline 1 there's no need or possibility to try older timelines, break. self.log.info("Could not delete wal_file: %r, returning", wal_path) break # let's try the same segment number on a previous timeline, but flag that timeline # as "invalid" until we're able to delete at least one segment on it. valid_timeline = False tli -= 1 self.log.info("Could not delete wal_file: %r, trying the same segment on a previous " "timeline (%s)", wal_path, wal.name_for_tli_log_seg(tli, log, seg)) except: # FIXME: don't catch all exceptions; pylint: disable=bare-except self.log.exception("Problem deleting: %r", wal_path)
def delete_remote_wal_before(self, wal_segment, site, pg_version): self.log.info("Starting WAL deletion from: %r before: %r, pg_version: %r", site, wal_segment, pg_version) storage = self.site_transfers.get(site) valid_timeline = True tli, log, seg = wal.name_to_tli_log_seg(wal_segment) while True: if valid_timeline: # Decrement one segment if we're on a valid timeline if seg == 0 and log == 0: break seg, log = wal.get_previous_wal_on_same_timeline(seg, log, pg_version) wal_path = os.path.join(self.config["backup_sites"][site]["prefix"], "xlog", wal.name_for_tli_log_seg(tli, log, seg)) self.log.debug("Deleting wal_file: %r", wal_path) try: storage.delete_key(wal_path) valid_timeline = True except FileNotFoundFromStorageError: if not valid_timeline or tli <= 1: # if we didn't find any WALs to delete on this timeline or we're already at # timeline 1 there's no need or possibility to try older timelines, break. self.log.info("Could not delete wal_file: %r, returning", wal_path) break # let's try the same segment number on a previous timeline, but flag that timeline # as "invalid" until we're able to delete at least one segment on it. valid_timeline = False tli -= 1 self.log.info("Could not delete wal_file: %r, trying the same segment on a previous " "timeline (%s)", wal_path, wal.name_for_tli_log_seg(tli, log, seg)) except Exception as ex: # FIXME: don't catch all exceptions; pylint: disable=broad-except self.log.exception("Problem deleting: %r", wal_path) self.metrics.unexpected_exception(ex, where="delete_remote_wal_before")
def run(self): self._init_cursor() if self.replication_slot: self.create_replication_slot() timeline = self.start_replication() while self.running: wal_name = None try: msg = self.c.read_message() except psycopg2.DatabaseError as ex: self.log.exception("Unexpected exception in reading walreceiver msg") self.stats.unexpected_exception(ex, where="walreceiver_run") continue self.log.debug("replication_msg: %r, buffer: %r/%r", msg, self.buffer.tell(), XLOG_SEG_SIZE) if msg: self.latest_activity = datetime.datetime.utcnow() log, _, seg = convert_integer_to_lsn(msg.data_start) wal_name = name_for_tli_log_seg(timeline, log, seg) if not self.latest_wal: self.latest_wal_start = msg.data_start self.latest_wal = wal_name self.buffer.write(msg.payload) # TODO: Calculate end pos and transmit that? msg.cursor.send_feedback(write_lsn=msg.data_start) if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= XLOG_SEG_SIZE: self.switch_xlog() for wal_start, queue in self.callbacks.items(): with suppress(Empty): transfer_result = queue.get_nowait() self.log.debug("Transfer result: %r", transfer_result) self.completed_wal_segments.add(wal_start) for completed_lsn in sorted(self.completed_wal_segments): self.callbacks.pop(completed_lsn) if self.callbacks: if completed_lsn > min(self.callbacks): pass # Do nothing since a smaller lsn is still being transferred else: # Earlier lsn than earlist on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) else: # No on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) if not msg: timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds() with suppress(InterruptedError): if not any(select.select([self.c], [], [], max(0, timeout))): self.c.send_feedback() # timing out, send keepalive
def run(self): self._init_cursor() if self.replication_slot: self.create_replication_slot() timeline = self.start_replication() while self.running: wal_name = None try: msg = self.c.read_message() except psycopg2.DatabaseError as ex: self.log.exception("Unexpected exception in reading walreceiver msg") self.stats.unexpected_exception(ex, where="walreceiver_run") continue self.log.debug("replication_msg: %r, buffer: %r/%r", msg, self.buffer.tell(), WAL_SEG_SIZE) if msg: self.latest_activity = datetime.datetime.utcnow() log, _, seg = convert_integer_to_lsn(msg.data_start) wal_name = name_for_tli_log_seg(timeline, log, seg) if not self.latest_wal: self.latest_wal_start = msg.data_start self.latest_wal = wal_name self.buffer.write(msg.payload) # TODO: Calculate end pos and transmit that? msg.cursor.send_feedback(write_lsn=msg.data_start) if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= WAL_SEG_SIZE: self.switch_wal() for wal_start, queue in self.callbacks.items(): with suppress(Empty): transfer_result = queue.get_nowait() self.log.debug("Transfer result: %r", transfer_result) self.completed_wal_segments.add(wal_start) for completed_lsn in sorted(self.completed_wal_segments): self.callbacks.pop(completed_lsn) if self.callbacks: if completed_lsn > min(self.callbacks): pass # Do nothing since a smaller lsn is still being transferred else: # Earlier lsn than earlist on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) else: # No on-going transfer, just advance flush_lsn self.c.send_feedback(flush_lsn=completed_lsn) self.completed_wal_segments.discard(completed_lsn) self.last_flushed_lsn = completed_lsn self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn) if not msg: timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds() with suppress(InterruptedError): if not any(select.select([self.c], [], [], max(0, timeout))): self.c.send_feedback() # timing out, send keepalive
def test_construct_wal_name(): sysinfo = { "dbname": "", "systemid": "6181331723016416192", "timeline": "4", "xlogpos": "F/190001B0", } assert wal.construct_wal_name(sysinfo) == wal.name_for_tli_log_seg(4, 0xF, 0x19) assert wal.construct_wal_name(sysinfo) == "000000040000000F00000019"