def _start_collectors(self, master_state, slave_state): def collect_deltas(source, dest): for delta in source[0].get_changes(): if delta: LOGGER.debug( f"Incremental delta received from {source[0]}:\n{delta}" ) self._queue.put((delta, source, dest)) LOGGER.trace("Collector thread is terminating.") watches = [ threading.Thread( name="Watch-MS", target=collect_deltas, args=((self.master_fs, master_state), (self.slave_fs, slave_state)), ), threading.Thread( name="Watch-SM", target=collect_deltas, args=((self.slave_fs, slave_state), (self.master_fs, master_state)), ), ] for watch in watches: watch.daemon = True # Kill with main thread watch.start() while True: LOGGER.info("Watching for FS state changes") delta, source, dest = self._queue.get() delta.apply(source, dest) LOGGER.debug(f"Incremental delta applied to {dest[0]}") for watch in watches: watch.join()
def read(self, path): file = self.search(path) if not file: raise FileNotFoundError(path) LOGGER.info(f"Downloading {file} at {path}") request = self._drive.files().get_media(fileId=file._id) stream = io.BytesIO() self._download(request, stream) # TODO: This code should be fixed in order to support Google Docs # # except HttpError as e: # LOGGER.error(f"Error reading {file}. Cause: {e}") # # Export a Google Doc file # if e.resp.status == 403: # pass # try: # self._download( # self._drive.files().export_media( # fileId=file.id, mimeType=file.mime_type # ), # stream, # ) # except HttpError as f: # if e.resp.status != 403: # raise stream.flush() stream.seek(0) return stream
def collect_deltas(source, dest): for delta in source[0].get_changes(): if delta: LOGGER.debug( f"Incremental delta received from {source[0]}:\n{delta}" ) self._queue.put((delta, source, dest)) LOGGER.trace("Collector thread is terminating.")
def remove(self, path): LOGGER.debug(f"Removing local file at {path}") abs_path = self._abs_path(path) try: os.remove(abs_path) except IsADirectoryError: rmtree(abs_path) except FileNotFoundError: pass
def __exit__(self, exc_type, exc_value, traceback): if self._orig_sig_handlers: LOGGER.debug("Restoring signal handlers") for h, s in zip(self._orig_sig_handlers, self.SIGNALS): signal.signal(s, h) if exc_value and exc_type not in (FSNotReady, ): LOGGER.critical( f"Emergency shutdown. Current FS states persisted. Cause: {exc_value}" ) raise exc_value
def load_fs_states(self, alias=None): if not alias: alias, = self._config.keys() self._master_state_file = os.path.join(STATES_DIR, f"{alias}_master.pickle") master_state = State.load(self._master_state_file) LOGGER.debug(f"Previous state of Master FS loaded") self._slave_state_file = os.path.join(STATES_DIR, f"{alias}_slave.pickle") slave_state = State.load(self._slave_state_file) LOGGER.debug("Previous state of Slave FS loaded") return master_state, slave_state
def wrapper(*args, **kwargs): try: CONNECTED.wait() return f(*args, **kwargs) except HttpError as e: LOGGER.warning( f"HTTP error {e.resp.status} suppressed after call to {f} with arguments " f"{args}, {kwargs}: {e}" ) except ServerNotFoundError as e: LOGGER.error( f"Cannot call {f} with arguments {args}, {kwargs} at this time. " f"Reason: {e}" ) CONNECTED.clear() CONNECTED.wait() return suppresserror(f)(*args, **kwargs)
def func_wrapper(*args, **kwargs): backoff = delay while True: try: error = False return f(*args, **kwargs) except FSNotReady as e: LOGGER.error(f"A file system is not ready yet: {e}") LOGGER.info( f"A new start attempt will be made in {int(backoff)} seconds" ) sleep(backoff) backoff = min(cap, backoff * ratio) error = True finally: if not error: backoff = delay
def get_changes(self): backoff = 5 while True: LOGGER.trace(f"Getting Drive changes (backoff: {backoff})") try: with STATE_LOCK: yield self._get_changes() backoff = 5 CONNECTED.set() except ServerNotFoundError: backoff *= 1.618 LOGGER.error( f"The Google Drive API is unreachable. Retrying in {int(backoff)} seconds." ) finally: sleep(backoff)
def resolve_conflicts(self, master_deltas, slave_deltas): mc, sc = master_deltas & slave_deltas if mc or sc: LOGGER.debug( f"Detected possible conflicts since last boot. Master: {mc}; Slave {sc}" ) def move_conflict(path): conflict_path = self.slave_fs.conflict(path) self.slave_fs.copy(path, conflict_path) LOGGER.info( f"Conflicting file on slave backed up: {path} -> {conflict_path}" ) for path in [p for p in master_deltas.removed if p in sc]: move_conflict(path) for path in [p for p in master_deltas.added if p in sc]: master_file = self.slave_fs.search(path) if not master_file: raise RuntimeError("Master file is unexpectedly missing.") if not master_file & self.slave_fs.search(path): # File is different, so slave file is conflict and we copy # master file over. move_conflict(path) for src, dst in master_deltas.moved: # src file has been moved/removed slave_src_file = self.slave_fs.search(src) if slave_src_file and src in sc: # Conflict master -> slave move_conflict(src) # dst file has been created/modified master_dst_file = self.master_fs.search(dst) if not master_dst_file: raise RuntimeError("Master file is unexpectedly missing.") slave_dst_file = self.slave_fs.search(dst) if slave_dst_file and dst in sc and not (master_dst_file & slave_dst_file): # File is different, so slave file is conflict and we copy # master file over. move_conflict(dst)
def _save_states(self, signum=None, frame=None): if signum: print("") LOGGER.warn( f"Received termination signal ({signum}). Shutting down...") if self._master_state: self._master_state.save(self._master_state_file) LOGGER.info("Master FS state saved") self._slave_state.save(self._slave_state_file) LOGGER.info("Slave FS state saved") if signum: exit(signum)
def move(self, src: str, dst: str): try: LOGGER.debug(f"Moving local file {src} to {dst}") move(self._abs_path(src), self._abs_path(dst)) except FileNotFoundError: pass
def read(self, path): try: return open(self._abs_path(path), "rb") except (FileNotFoundError, IOError): LOGGER.error(f"Cannot read file {self._abs_path(path)} from {self}") return None
def makedirs(self, path): LOGGER.debug(f"Creating local directory {path}") os.makedirs(self._abs_path(path), exist_ok=True)
def move_conflict(path): conflict_path = self.slave_fs.conflict(path) self.slave_fs.copy(path, conflict_path) LOGGER.info( f"Conflicting file on slave backed up: {path} -> {conflict_path}" )
def start(self): with ErwinConfiguration() as config: LOGGER.info("Erwin configuration loaded successfully.") # Create master and slave FSs self.master_fs = GoogleDriveFS(**config.get_master_fs_params()) LOGGER.info("Master FS is online.") LOGGER.debug(f"Created Master FS of type {type(self.master_fs)}") self.slave_fs = LocalFS(**config.get_slave_fs_params()) LOGGER.info("Slave FS is online.") LOGGER.debug(f"Created Slave FS of type {type(self.slave_fs)}") # Load the previous state prev_master_state, prev_slave_state = config.load_fs_states() # Register signal handlers config.register_state_handler(prev_master_state, prev_slave_state) LOGGER.info("Previous FS states loaded successfully.") # Compute deltas since last launch master_deltas = self.master_fs.state - prev_master_state LOGGER.debug(f"Master deltas since last state save:\n{master_deltas}") slave_deltas = self.slave_fs.state - prev_slave_state LOGGER.debug(f"Slave deltas since last state save:\n{slave_deltas}") self.resolve_conflicts(master_deltas, slave_deltas) master_deltas.apply( (self.master_fs, prev_master_state), (self.slave_fs, prev_slave_state) ) if self.master_fs.state - prev_master_state: raise RuntimeError("Not all deltas applied correctly to master!") # At this point we do not expect to have any conflicts left as we # have resolved them at master before. new_slave_deltas = self.slave_fs.state - prev_slave_state LOGGER.debug(f"New deltas:\n{new_slave_deltas}") new_slave_deltas.apply( (self.slave_fs, prev_slave_state), (self.master_fs, prev_master_state) ) # Start the collectors to watch for changes on both FSs. self._start_collectors(prev_master_state, prev_slave_state)