class NetworkDiscovery: """ Discovers remote SyncAll instances on the same network. """ def __init__(self, port, version, uuid): self.logger = logging.getLogger(__name__) # Store UUID based on the hostname and current time to check # if the received broadcast packet is from self self.uuid = uuid self.client_discovered = Event() self.port = port self.version = version self.__init_network_objects() def __init_network_objects(self): self.listener = BroadcastListener(self.port) self.listener.packet_received += self.__receive_packet self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) def start_listening(self): self.listener.start() def shutdown(self): self.listener.shutdown() try: self.socket.shutdown(socket.SHUT_RDWR) except: pass def request(self): """ Sends a discovery request to all hosts on the LAN """ self.__broadcast({ 'version': self.version, 'uuid': self.uuid }, self.port) def __broadcast(self, data, port): self.socket.sendto(msgpack.packb(data), (BROADCAST_ADDRESS, port)) def __receive_packet(self, data): if self.__is_self(data): return self.logger.debug("Received discovery request from {}" .format(data['source'])) del data['server'] self.client_discovered.notify(data) def __is_self(self, data): return data['data']['uuid'] == self.uuid
class NetworkDiscovery: """ Discovers remote SyncAll instances on the same network. """ def __init__(self, port, version, uuid): self.logger = logging.getLogger(__name__) # Store UUID based on the hostname and current time to check # if the received broadcast packet is from self self.uuid = uuid self.client_discovered = Event() self.port = port self.version = version self.__init_network_objects() def __init_network_objects(self): self.listener = BroadcastListener(self.port) self.listener.packet_received += self.__receive_packet self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) def start_listening(self): self.listener.start() def shutdown(self): self.listener.shutdown() try: self.socket.shutdown(socket.SHUT_RDWR) except: pass def request(self): """ Sends a discovery request to all hosts on the LAN """ self.__broadcast({ 'version': self.version, 'uuid': self.uuid }, self.port) def __broadcast(self, data, port): self.socket.sendto(msgpack.packb(data), (BROADCAST_ADDRESS, port)) def __receive_packet(self, data): if self.__is_self(data): return self.logger.debug("Received discovery request from {}".format( data['source'])) del data['server'] self.client_discovered.notify(data) def __is_self(self, data): return data['data']['uuid'] == self.uuid
class Directory: """ Listens for file system changes in specific directory and applies changes from different sources. """ IGNORE_PATTERNS = r'\.syncall_.*' def __init__(self, uuid, dir_path, index_name='.syncall_index', load_index=True, temp_dir_name='.syncall_temp', create_temp_dir=False): self.logger = logging.getLogger(__name__) self.uuid = uuid self.dir_path = dir_path self.index_name = index_name self.index_path = os.path.join(self.dir_path, self.index_name) self.temp_dir = os.path.join(self.dir_path, temp_dir_name) self.last_update = datetime.now().timestamp() if create_temp_dir and not os.path.exists(self.temp_dir): os.mkdir(self.temp_dir) self.fs_access_lock = threading.Lock() self.temp_dir_lock = threading.Lock() self.temp_files = set() self.transfer_manager = syncall.TransferManager(self) self.index_updated = Event() # Contains tuple(uuid, file_name, file_index) as data self.transfer_finalized = Event() if load_index: self.load_index() else: self._index = dict() def get_last_update(self): return self.last_update def get_temp_path(self, proposed_name): """ Return a path to a temp file that can be written to. Use `proposed_name` if it's available or modify it so it is. """ proposed_name = os.path.basename(proposed_name) name = proposed_name file_suffix = 0 with self.temp_dir_lock: while os.path.isfile(os.path.join(self.temp_dir, name)): file_suffix += 1 name = "{}-{}".format(proposed_name, file_suffix) file_path = os.path.join(self.temp_dir, name) # Create the file to avoid possible race conditions # after the with block with open(file_path, 'a+'): pass self.temp_files.add(file_path) return file_path def release_temp_file(self, path): """ Remove a temp file created using `get_temp_path`. """ if path in self.temp_files: try: os.remove(path) except: pass def clear_temp_dir(self): for path in self.temp_files: self.release_temp_file(path) def get_file_path(self, file_name): return os.path.join(self.dir_path, file_name) def get_block_checksums(self, file_name, block_size): with self.fs_access_lock: if file_name not in self._index: return [] file_data = self._get_index_unsafe(file_name) if 'deleted' in file_data and file_data['deleted']: return [] with open(self.get_file_path(file_name), 'rb') as file: block_checksums = list( pyrsync2.blockchecksums(file, blocksize=block_size)) return block_checksums def load_index(self): with self.fs_access_lock: if os.path.isfile(self.index_path): with open(self.index_path, 'rb') as index_file: index = msgpack.unpackb(index_file.read()) # Decode the object to utf strings except the 'hash' values self._index = bintools.decode_object(index, except_keys=('hash', )) else: self._index = dict() self.last_update = datetime.now().timestamp() def get_index(self, file_name=None): with self.fs_access_lock: return self._get_index_unsafe(file_name=file_name) def _get_index_unsafe(self, file_name=None): if file_name is None: return self._index elif file_name not in self._index: return None else: return self._index[file_name] def save_index(self): with self.fs_access_lock: index = msgpack.packb(self._index) with open(self.index_path, 'wb') as index_file: index_file.write(index) def update_index(self, save_index=True, force=False): """ Update self._index (use the get_index() method to get it). Return True if index changed, False otherwise. The index structure is: <index> ::= { <file_name>: <file_info>, ... } <file_name> ::= file path relative to directory top <file_info> ::= { 'sync_log': { <remote_uuid (as string)>: <timestamp>, ... }, 'last_update_location': <remote_uuid (or the local UUID) (str)> 'last_update': <timestamp>, 'hash': <md5 byte-string>, [optional 'deleted': (True|False)] } <timestamp> ::= Datetime in unix timestamp (seconds). Depends on the os time on the system on which the change happened. """ changes = set() with self.fs_access_lock: for file_data in self._index.values(): file_data['not_found'] = True for dirpath, dirnames, filenames in os.walk(self.dir_path): for name in filenames: file_path = pathext.normalize(os.path.join(dirpath, name)) if not re.search(self.IGNORE_PATTERNS, file_path): self._update_file_index(file_path, changes) # Mark each deleted file with the current timestamp # and UUID to avoid conflicts and to propagate properly timestamp = datetime.now().timestamp() for file_name, file_data in self._index.items(): if 'not_found' in file_data: del file_data['not_found'] if 'deleted' in file_data and file_data['deleted']: # File has been deleted some time ago... continue # File has been deleted now file_data['deleted'] = True file_data['last_update'] = timestamp file_data['last_update_location'] = self.uuid file_data['hash'] = b'' sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = timestamp changes.add(file_name) if changes: self.last_update = datetime.now().timestamp() if save_index and changes: self.save_index() if force: self.index_updated.notify(None) elif changes: self.index_updated.notify(changes) def _update_file_index(self, file_path, changes): relative_path = pathext.normalize( os.path.relpath(file_path, self.dir_path)) file_data = self._index.setdefault(relative_path, dict()) if not file_data: # New file file_hash = bintools.hash_file(file_path) file_data['last_update'] = int(os.path.getmtime(file_path)) file_data['hash'] = file_hash file_data['last_update_location'] = self.uuid sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = file_data['last_update'] changes.add(relative_path) elif int(os.path.getmtime(file_path)) > file_data['last_update']: # Check if file is actually changed or the system time is off file_hash = bintools.hash_file(file_path) if file_data['hash'] != file_hash: # File modified locally (since last sync) file_data['last_update'] = int(os.path.getmtime(file_path)) file_data['hash'] = file_hash file_data['last_update_location'] = self.uuid sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = file_data['last_update'] changes.add(relative_path) if 'deleted' in file_data: file_data['last_update'] = datetime.now().timestamp() file_data['hash'] = bintools.hash_file(file_path) file_data['last_update_location'] = self.uuid sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = file_data['last_update'] changes.add(relative_path) del file_data['deleted'] if 'not_found' in file_data: del file_data['not_found'] def diff(self, remote_index): return IndexDiff.diff(self._index, remote_index) def finalize_transfer(self, transfer): if transfer.type == syncall.transfers.FileTransfer.TO_REMOTE: self.__finalize_transfer_to_remote(transfer) else: self.__finalize_transfer_from_remote(transfer) self.save_index() def __finalize_transfer_to_remote(self, transfer): with self.fs_access_lock: self.__update_index_after_transfer( transfer.file_name, self._get_index_unsafe(transfer.file_name), transfer.get_remote_uuid(), transfer.timestamp) self.index_updated.notify({transfer.file_name}) def __finalize_transfer_from_remote(self, transfer): updated = False with self.fs_access_lock: diff = IndexDiff.compare_file( transfer.remote_file_data, self._get_index_unsafe(transfer.file_name)) if diff == NEEDS_UPDATE: if 'deleted' in transfer.remote_file_data and \ transfer.remote_file_data['deleted']: try: os.remove(self.get_file_path(transfer.file_name)) except: pass else: try: os.makedirs( os.path.dirname( self.get_file_path(transfer.file_name))) except: pass # Update the actual file shutil.move(transfer.get_temp_path(), self.get_file_path(transfer.file_name)) # Update the file index self.__update_index_after_transfer(transfer.file_name, transfer.remote_file_data, transfer.messanger.my_uuid, transfer.timestamp) updated = True else: self.logger.debug( "Skipping update of outdated file {} from {}".format( transfer.file_name, transfer.get_remote_uuid())) if updated: self.index_updated.notify({transfer.file_name}) def __update_index_after_transfer(self, file_name, file_index, uuid, time): file_index['sync_log'][uuid] = time self._index[file_name] = file_index self.last_update = datetime.now().timestamp() self.transfer_finalized.notify((uuid, file_name, file_index))
class FileTransfer(threading.Thread): # Transfer types FROM_REMOTE = 0 TO_REMOTE = 1 # Message types MSG_INIT = 0 MSG_INIT_ACCEPT = 1 MSG_CANCEL = 2 MSG_BLOCK_DATA = 3 MSG_DONE = 4 MSG_DONE_ACCEPT = 5 def __init__(self, directory, messanger, file_name=None, block_size=4098): super().__init__() self.logger = logging.getLogger(__name__) if file_name is None: self.type = self.FROM_REMOTE else: self.type = self.TO_REMOTE self.directory = directory self.messanger = messanger self.timestamp = None self.file_name = file_name if file_name is not None: self.file_data = self.directory.get_index(self.file_name) else: self.file_data = None self.block_size = block_size self.remote_file_data = None self.remote_checksums = None self.messanger.packet_received += self.__packet_received self.messanger.disconnected += self.__disconnected self.__transfer_started = False self.__transfer_completed = False self.__transfer_cancelled = False self.__temp_file_name = None self.__temp_file_handle = None self.__file_handle = None self.transfer_started = Event() self.transfer_completed = Event() self.transfer_failed = Event() self.transfer_cancelled = Event() def get_temp_path(self): return self.__temp_file_name def initialize(self): self.messanger.start_receiving() def is_done(self): return self.__transfer_cancelled or self.__transfer_completed def has_started(self): return self.__transfer_started def get_remote_uuid(self): return self.messanger.remote_uuid def shutdown(self): self.__transfer_cancelled = True self.transfer_cancelled.notify(self) self.messanger.send({ "type": self.MSG_CANCEL }) self.messanger.disconnect() self.__release_resources() def terminate(self): self.messanger.disconnect() self.__release_resources() def __release_resources(self): if self.__temp_file_handle is not None: self.__temp_file_handle.close() self.__temp_file_handle = None if self.__file_handle is not None: self.__file_handle.close() self.__file_handle = None if self.__temp_file_name is not None: self.directory.release_temp_file(self.__temp_file_name) self.__temp_file_name = None def start(self): """ Transfer a file to the remote end. Do not call this if a transfer request should be handled. """ if self.type != self.TO_REMOTE: raise ValueError("Transfer was not created as TO_REMOTE type") self.__transfer_started = True self.transfer_started.notify(self) self.messanger.send({ "type": self.MSG_INIT, "name": self.file_name, "data": self.file_data }) def __transfer_file(self, remote_checksums, block_size): self.logger.debug( "Started transferring file {} to remote {}" .format(self.file_name, self.messanger.address[0]) ) self.block_size = block_size self.remote_checksums = remote_checksums super().start() def run(self): """ Send the delta data to the remote side. """ try: with open(self.directory.get_file_path(self.file_name), 'rb') \ as file: delta_generator = pyrsync2.rsyncdelta( file, self.remote_checksums, blocksize=self.block_size, max_buffer=self.block_size ) # Actual transfer of data for block in delta_generator: self.messanger.send({ "type": self.MSG_BLOCK_DATA, "binary_data": block }) except Exception as ex: self.logger.exception(ex) self.logger.error( "File {} couldn't be read transferred to {}. Maybe it changed." .format(self.file_name, self.messanger.address[0]) ) self.shutdown() else: self.messanger.send({ "type": self.MSG_DONE }) def is_delete(self): if self.type == self.TO_REMOTE: return 'deleted' in self.file_data and self.file_data['deleted'] else: return 'deleted' in self.remote_file_data and \ self.remote_file_data['deleted'] def __accept_file(self, file_name, file_data): """ Make sure the file needs to be transferred and accept it if it does. """ file_status = syncall.IndexDiff.compare_file( file_data, self.directory.get_index().get(file_name, None) ) if file_status == syncall.index.NEEDS_UPDATE: self.file_name = file_name self.file_data = self.directory.get_index(self.file_name) self.remote_file_data = file_data if not self.is_delete(): self.__temp_file_name = self.directory.get_temp_path( self.file_name ) self.__temp_file_handle = open(self.__temp_file_name, 'wb') if os.path.exists( self.directory.get_file_path(self.file_name) ): self.__file_handle = open( self.directory.get_file_path(self.file_name), 'rb' ) else: self.__file_handle = BytesIO() self.__transfer_started = True self.transfer_started.notify(self) if self.is_delete(): self.messanger.send({ "type": self.MSG_INIT_ACCEPT }) self.logger.debug( "Accepted a file delete request for {} from {}" .format(file_name, self.messanger.address[0]) ) else: self.messanger.send({ "type": self.MSG_INIT_ACCEPT, "block_size": self.block_size, "checksums": self.directory.get_block_checksums( self.file_name, self.block_size ) }) self.logger.debug( "Accepted a file transfer request for {} from {}" .format(file_name, self.messanger.address[0]) ) else: self.logger.error( "File transfer requested for {} from {} shouldn't be updated" .format(file_name, self.messanger.address[0]) ) self.shutdown() def __packet_received(self, data): """ Message sequence should be: 1. MSG_INIT | sender -> receiver - Contains file_name and file_data (index data) 2. MSG_INIT_ACCEPT or MSG_CANCEL | receiver -> sender - Contains block_size and block checksums 3. Multiple MSG_BLOCK_DATA | sender -> receiver - Contains the delta data for each block, in sequence 4. MSG_DONE | sender -> receiver - No other data is going to be transfered (no more MSG_BLOCK_DATA) 5. MSG_DONE_ACCEPT | receiver -> sender - The receiver successfuly received and processed the data and the file index for the file should be updated on both ends to reflect the sync time. - Contains `time` field with the current timestamp on the receiver machine. It's used to update both indexes to handle time offsets between the two machines. - The sender should close the connection after receiving this packet. If the transfer is supposed to delete a file then step 3 is skipped and the sender should send MSG_DONE immedeately after MSG_INIT_ACCEPT. The file itself should be deleted on the receiver after the MSG_DONE message and MSG_DONE_ACCEPT is sent if the delete is successful. MSG_CANCEL can be sent at any time from the receiver or the sender and the one that receives it should close the connection. If no MSG_CANCEL or MSG_DONE_ACCEPT message is received then the connection is regarded as closed unexpectedly and the transfer is considered failed. """ if data['type'] == self.MSG_INIT: self.__accept_file(data['name'], data['data']) elif data['type'] == self.MSG_INIT_ACCEPT: if self.is_delete(): self.logger.debug( "Transferring delete of {} to {}" .format(self.file_name, self.messanger.address[0]) ) self.messanger.send({ "type": self.MSG_DONE }) else: self.__transfer_file(data['checksums'], data['block_size']) elif data['type'] == self.MSG_CANCEL: self.__transfer_cancelled = True self.terminate() self.transfer_cancelled.notify(self) elif data['type'] == self.MSG_BLOCK_DATA: if not self.__transfer_started: self.logger.error( "Received data from {} for {}, but transfer not started" .format(self.messanger.address[0], self.file_name) ) self.terminate() return self.__data_received(data['binary_data']) elif data['type'] == self.MSG_DONE: self.__complete_transfer() elif data['type'] == self.MSG_DONE_ACCEPT: self.__transfer_completed = True self.timestamp = data['time'] self.terminate() self.transfer_completed.notify(self) else: self.logger.error("Unknown packet from {}: {}".format( self.messanger.address[0], data['type'] )) def __data_received(self, block): try: pyrsync2.patchstream_block( self.__file_handle, self.__temp_file_handle, block, blocksize=self.block_size ) except Exception as ex: self.logger.exception(ex) self.logger.error( "Block couldn't be applied to temp file of {}. Remote: {}" .format(self.file_name, self.messanger.address[0]) ) self.shutdown() def __complete_transfer(self): self.timestamp = int(datetime.now().timestamp()) if not self.is_delete(): # Flush the file contents self.__file_handle.close() self.__file_handle = None self.__temp_file_handle.close() self.__temp_file_handle = None # Remote side should disconnect after MSG_DONE_ACCEPT self.__transfer_completed = True self.transfer_completed.notify(self) self.messanger.send({ 'type': self.MSG_DONE_ACCEPT, 'time': self.timestamp }) def __disconnected(self, data): self.__release_resources() if not self.__transfer_cancelled and not self.__transfer_completed: self.transfer_failed.notify(self)
class RemoteStore: """ Manages communication to a single remote SyncAll instance. """ def __init__(self, messanger, directory): self.logger = logging.getLogger(__name__) self.messanger = messanger self.directory = directory self.directory.transfer_finalized += self.__transfer_finalized self.my_index_last_updated = 0 self.remote_index = None self.address = self.messanger.address[0] self.my_uuid = self.messanger.my_uuid self.uuid = self.messanger.remote_uuid self.disconnected = Event() self.messanger.disconnected += self.__disconnected self.messanger.packet_received += self._packet_received def __transfer_finalized(self, data): remote_uuid, file_name, file_data = data if self.uuid != remote_uuid: return if self.remote_index is not None: self.remote_index[file_name] = file_data def request_transfer(self, transfer_messanger): # Pass the transfer request to the transfer manager self.directory.transfer_manager.process_transfer( self, transfer_messanger ) def index_received(self): return self.remote_index is not None def start_receiving(self): self.messanger.start_receiving() self.send_index(request=False) def send_index(self, request=True, force=False): if not force and \ self.my_index_last_updated == self.directory.get_last_update(): # Nothing to do here, index is already up-to-date # self.logger.debug( # "Index update requested but there are no changes" # ) self.messanger.send({ 'type': MSG_INDEX_NO_CHANGE }) return self.my_index_last_updated = self.directory.get_last_update() self.messanger.send({ 'type': MSG_INDEX, 'index': self.directory.get_index() }) if request: self.messanger.send({ 'type': MSG_REQUEST_INDEX }) def send_index_delta(self, changes, request=True): """ Send only the changed files (`changes`) index data to the remote. Use ONLY when ALL changed files are sent this way. """ self.my_index_last_updated = self.directory.get_last_update() index = self.directory.get_index() if self.remote_index is not None: for file_name in list(changes): if file_name in self.remote_index and \ index[file_name] == self.remote_index[file_name]: changes.remove(file_name) if len(changes) == 0: return self.messanger.send({ 'type': MSG_INDEX_DELTA, 'index': {file_name: index[file_name] for file_name in changes} }) if request: self.messanger.send({ 'type': MSG_REQUEST_INDEX }) def __disconnected(self, no_data): self.directory.transfer_manager.remote_disconnect(self) self.disconnected.notify(self) def disconnect(self): self.messanger.disconnect() def _packet_received(self, packet): if 'type' not in packet: self.logger.error("Received packet with no type from {}".format( self.address )) return # self.logger.debug("Received packet from {}: {}".format( # self.address, # packet['type'] # )) if packet['type'] == MSG_INDEX: self.remote_index = packet['index'] self.__remote_index_updated() elif packet['type'] == MSG_INDEX_DELTA: updates = False for file_name, file_data in packet['index'].items(): if self.remote_index is None or \ file_name not in self.remote_index or \ self.remote_index[file_name] != file_data: updates = True self.remote_index[file_name] = file_data if updates: self.__remote_index_updated() elif packet['type'] == MSG_REQUEST_INDEX: self.send_index(request=False) elif packet['type'] == MSG_INDEX_NO_CHANGE: self.__remote_index_updated() else: self.logger.error("Unknown packet from {}: {}".format( self.address, packet['type'] )) def __remote_index_updated(self): # self.logger.debug("{}'s index updated".format(self.address)) diff = self.directory.diff(self.remote_index) if diff[2]: self.logger.debug( "File conflicts with {}: {}" .format(self.uuid, diff[2]) ) # TODO: Handle conflicted files self.directory.transfer_manager.sync_files(self, diff[0]) self.directory.transfer_manager.sync_files(self, diff[1])
class Messanger(Thread): """ Delivers and receives packets to/from remote instances using TCP. """ BUFFER_SIZE = 1024 * 1024 CONNECT_TIMEOUT = 5 def __init__(self, socket, address, my_uuid, remote_uuid): super().__init__() # self.daemon = True self.logger = logging.getLogger(__name__) self.packet_received = Event() self.disconnected = Event() self.address = address self.socket = socket self.my_uuid = my_uuid self.remote_uuid = remote_uuid self.__unpacker = msgpack.Unpacker() @staticmethod def connect(address, my_uuid, remote_uuid): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(Messanger.CONNECT_TIMEOUT) sock.connect(address) sock.sendall(uuid.UUID(my_uuid).bytes) sock.settimeout(None) return Messanger(sock, address, my_uuid, remote_uuid) def disconnect(self): try: self.socket.shutdown(socket.SHUT_RDWR) except: pass def start_receiving(self): self.start() def run(self): with self.socket: while True: try: data = self.socket.recv(self.BUFFER_SIZE) except: break if not data: break processed = self.__handle_received_data(data) if not processed: self.disconnect() break self.disconnected.notify() self.disconnected.clear_handlers() self.packet_received.clear_handlers() def send(self, data): packet = msgpack.packb(data) try: self.socket.send(packet) except Exception as ex: self.logger.error( "Couldn't send data to {}" .format(self.address[0]) ) self.logger.exception(ex) self.disconnect() def __handle_received_data(self, data): self.__unpacker.feed(data) for packet in self.__unpacker: try: unpacked_packet = bintools.decode_object( packet, except_keys=('hash', 'binary_data', 'checksums') ) except Exception as ex: self.logger.error( "Error trying to decode strings to utf-8 in packet from {}" .format(self.address[0]) ) self.logger.exception(ex) return False else: try: self.packet_received.notify(unpacked_packet) except Exception as ex: self.logger.error("Error processing packet from {}" .format(self.address[0])) self.logger.exception(ex) return False return True
class FileTransfer(threading.Thread): # Transfer types FROM_REMOTE = 0 TO_REMOTE = 1 # Message types MSG_INIT = 0 MSG_INIT_ACCEPT = 1 MSG_CANCEL = 2 MSG_BLOCK_DATA = 3 MSG_DONE = 4 MSG_DONE_ACCEPT = 5 def __init__(self, directory, messanger, file_name=None, block_size=4098): super().__init__() self.logger = logging.getLogger(__name__) if file_name is None: self.type = self.FROM_REMOTE else: self.type = self.TO_REMOTE self.directory = directory self.messanger = messanger self.timestamp = None self.file_name = file_name if file_name is not None: self.file_data = self.directory.get_index(self.file_name) else: self.file_data = None self.block_size = block_size self.remote_file_data = None self.remote_checksums = None self.messanger.packet_received += self.__packet_received self.messanger.disconnected += self.__disconnected self.__transfer_started = False self.__transfer_completed = False self.__transfer_cancelled = False self.__temp_file_name = None self.__temp_file_handle = None self.__file_handle = None self.transfer_started = Event() self.transfer_completed = Event() self.transfer_failed = Event() self.transfer_cancelled = Event() def get_temp_path(self): return self.__temp_file_name def initialize(self): self.messanger.start_receiving() def is_done(self): return self.__transfer_cancelled or self.__transfer_completed def has_started(self): return self.__transfer_started def get_remote_uuid(self): return self.messanger.remote_uuid def shutdown(self): self.__transfer_cancelled = True self.transfer_cancelled.notify(self) self.messanger.send({"type": self.MSG_CANCEL}) self.messanger.disconnect() self.__release_resources() def terminate(self): self.messanger.disconnect() self.__release_resources() def __release_resources(self): if self.__temp_file_handle is not None: self.__temp_file_handle.close() self.__temp_file_handle = None if self.__file_handle is not None: self.__file_handle.close() self.__file_handle = None if self.__temp_file_name is not None: self.directory.release_temp_file(self.__temp_file_name) self.__temp_file_name = None def start(self): """ Transfer a file to the remote end. Do not call this if a transfer request should be handled. """ if self.type != self.TO_REMOTE: raise ValueError("Transfer was not created as TO_REMOTE type") self.__transfer_started = True self.transfer_started.notify(self) self.messanger.send({ "type": self.MSG_INIT, "name": self.file_name, "data": self.file_data }) def __transfer_file(self, remote_checksums, block_size): self.logger.debug("Started transferring file {} to remote {}".format( self.file_name, self.messanger.address[0])) self.block_size = block_size self.remote_checksums = remote_checksums super().start() def run(self): """ Send the delta data to the remote side. """ try: with open(self.directory.get_file_path(self.file_name), 'rb') \ as file: delta_generator = pyrsync2.rsyncdelta( file, self.remote_checksums, blocksize=self.block_size, max_buffer=self.block_size) # Actual transfer of data for block in delta_generator: self.messanger.send({ "type": self.MSG_BLOCK_DATA, "binary_data": block }) except Exception as ex: self.logger.exception(ex) self.logger.error( "File {} couldn't be read transferred to {}. Maybe it changed." .format(self.file_name, self.messanger.address[0])) self.shutdown() else: self.messanger.send({"type": self.MSG_DONE}) def is_delete(self): if self.type == self.TO_REMOTE: return 'deleted' in self.file_data and self.file_data['deleted'] else: return 'deleted' in self.remote_file_data and \ self.remote_file_data['deleted'] def __accept_file(self, file_name, file_data): """ Make sure the file needs to be transferred and accept it if it does. """ file_status = syncall.IndexDiff.compare_file( file_data, self.directory.get_index().get(file_name, None)) if file_status == syncall.index.NEEDS_UPDATE: self.file_name = file_name self.file_data = self.directory.get_index(self.file_name) self.remote_file_data = file_data if not self.is_delete(): self.__temp_file_name = self.directory.get_temp_path( self.file_name) self.__temp_file_handle = open(self.__temp_file_name, 'wb') if os.path.exists(self.directory.get_file_path( self.file_name)): self.__file_handle = open( self.directory.get_file_path(self.file_name), 'rb') else: self.__file_handle = BytesIO() self.__transfer_started = True self.transfer_started.notify(self) if self.is_delete(): self.messanger.send({"type": self.MSG_INIT_ACCEPT}) self.logger.debug( "Accepted a file delete request for {} from {}".format( file_name, self.messanger.address[0])) else: self.messanger.send({ "type": self.MSG_INIT_ACCEPT, "block_size": self.block_size, "checksums": self.directory.get_block_checksums(self.file_name, self.block_size) }) self.logger.debug( "Accepted a file transfer request for {} from {}".format( file_name, self.messanger.address[0])) else: self.logger.error( "File transfer requested for {} from {} shouldn't be updated". format(file_name, self.messanger.address[0])) self.shutdown() def __packet_received(self, data): """ Message sequence should be: 1. MSG_INIT | sender -> receiver - Contains file_name and file_data (index data) 2. MSG_INIT_ACCEPT or MSG_CANCEL | receiver -> sender - Contains block_size and block checksums 3. Multiple MSG_BLOCK_DATA | sender -> receiver - Contains the delta data for each block, in sequence 4. MSG_DONE | sender -> receiver - No other data is going to be transfered (no more MSG_BLOCK_DATA) 5. MSG_DONE_ACCEPT | receiver -> sender - The receiver successfuly received and processed the data and the file index for the file should be updated on both ends to reflect the sync time. - Contains `time` field with the current timestamp on the receiver machine. It's used to update both indexes to handle time offsets between the two machines. - The sender should close the connection after receiving this packet. If the transfer is supposed to delete a file then step 3 is skipped and the sender should send MSG_DONE immedeately after MSG_INIT_ACCEPT. The file itself should be deleted on the receiver after the MSG_DONE message and MSG_DONE_ACCEPT is sent if the delete is successful. MSG_CANCEL can be sent at any time from the receiver or the sender and the one that receives it should close the connection. If no MSG_CANCEL or MSG_DONE_ACCEPT message is received then the connection is regarded as closed unexpectedly and the transfer is considered failed. """ if data['type'] == self.MSG_INIT: self.__accept_file(data['name'], data['data']) elif data['type'] == self.MSG_INIT_ACCEPT: if self.is_delete(): self.logger.debug("Transferring delete of {} to {}".format( self.file_name, self.messanger.address[0])) self.messanger.send({"type": self.MSG_DONE}) else: self.__transfer_file(data['checksums'], data['block_size']) elif data['type'] == self.MSG_CANCEL: self.__transfer_cancelled = True self.terminate() self.transfer_cancelled.notify(self) elif data['type'] == self.MSG_BLOCK_DATA: if not self.__transfer_started: self.logger.error( "Received data from {} for {}, but transfer not started". format(self.messanger.address[0], self.file_name)) self.terminate() return self.__data_received(data['binary_data']) elif data['type'] == self.MSG_DONE: self.__complete_transfer() elif data['type'] == self.MSG_DONE_ACCEPT: self.__transfer_completed = True self.timestamp = data['time'] self.terminate() self.transfer_completed.notify(self) else: self.logger.error("Unknown packet from {}: {}".format( self.messanger.address[0], data['type'])) def __data_received(self, block): try: pyrsync2.patchstream_block(self.__file_handle, self.__temp_file_handle, block, blocksize=self.block_size) except Exception as ex: self.logger.exception(ex) self.logger.error( "Block couldn't be applied to temp file of {}. Remote: {}". format(self.file_name, self.messanger.address[0])) self.shutdown() def __complete_transfer(self): self.timestamp = int(datetime.now().timestamp()) if not self.is_delete(): # Flush the file contents self.__file_handle.close() self.__file_handle = None self.__temp_file_handle.close() self.__temp_file_handle = None # Remote side should disconnect after MSG_DONE_ACCEPT self.__transfer_completed = True self.transfer_completed.notify(self) self.messanger.send({ 'type': self.MSG_DONE_ACCEPT, 'time': self.timestamp }) def __disconnected(self, data): self.__release_resources() if not self.__transfer_cancelled and not self.__transfer_completed: self.transfer_failed.notify(self)
class Directory: """ Listens for file system changes in specific directory and applies changes from different sources. """ IGNORE_PATTERNS = r'\.syncall_.*' def __init__(self, uuid, dir_path, index_name='.syncall_index', load_index=True, temp_dir_name='.syncall_temp', create_temp_dir=False): self.logger = logging.getLogger(__name__) self.uuid = uuid self.dir_path = dir_path self.index_name = index_name self.index_path = os.path.join(self.dir_path, self.index_name) self.temp_dir = os.path.join(self.dir_path, temp_dir_name) self.last_update = datetime.now().timestamp() if create_temp_dir and not os.path.exists(self.temp_dir): os.mkdir(self.temp_dir) self.fs_access_lock = threading.Lock() self.temp_dir_lock = threading.Lock() self.temp_files = set() self.transfer_manager = syncall.TransferManager(self) self.index_updated = Event() # Contains tuple(uuid, file_name, file_index) as data self.transfer_finalized = Event() if load_index: self.load_index() else: self._index = dict() def get_last_update(self): return self.last_update def get_temp_path(self, proposed_name): """ Return a path to a temp file that can be written to. Use `proposed_name` if it's available or modify it so it is. """ proposed_name = os.path.basename(proposed_name) name = proposed_name file_suffix = 0 with self.temp_dir_lock: while os.path.isfile(os.path.join(self.temp_dir, name)): file_suffix += 1 name = "{}-{}".format(proposed_name, file_suffix) file_path = os.path.join(self.temp_dir, name) # Create the file to avoid possible race conditions # after the with block with open(file_path, 'a+'): pass self.temp_files.add(file_path) return file_path def release_temp_file(self, path): """ Remove a temp file created using `get_temp_path`. """ if path in self.temp_files: try: os.remove(path) except: pass def clear_temp_dir(self): for path in self.temp_files: self.release_temp_file(path) def get_file_path(self, file_name): return os.path.join(self.dir_path, file_name) def get_block_checksums(self, file_name, block_size): with self.fs_access_lock: if file_name not in self._index: return [] file_data = self._get_index_unsafe(file_name) if 'deleted' in file_data and file_data['deleted']: return [] with open(self.get_file_path(file_name), 'rb') as file: block_checksums = list(pyrsync2.blockchecksums( file, blocksize=block_size )) return block_checksums def load_index(self): with self.fs_access_lock: if os.path.isfile(self.index_path): with open(self.index_path, 'rb') as index_file: index = msgpack.unpackb(index_file.read()) # Decode the object to utf strings except the 'hash' values self._index = bintools.decode_object( index, except_keys=('hash',) ) else: self._index = dict() self.last_update = datetime.now().timestamp() def get_index(self, file_name=None): with self.fs_access_lock: return self._get_index_unsafe(file_name=file_name) def _get_index_unsafe(self, file_name=None): if file_name is None: return self._index elif file_name not in self._index: return None else: return self._index[file_name] def save_index(self): with self.fs_access_lock: index = msgpack.packb(self._index) with open(self.index_path, 'wb') as index_file: index_file.write(index) def update_index(self, save_index=True, force=False): """ Update self._index (use the get_index() method to get it). Return True if index changed, False otherwise. The index structure is: <index> ::= { <file_name>: <file_info>, ... } <file_name> ::= file path relative to directory top <file_info> ::= { 'sync_log': { <remote_uuid (as string)>: <timestamp>, ... }, 'last_update_location': <remote_uuid (or the local UUID) (str)> 'last_update': <timestamp>, 'hash': <md5 byte-string>, [optional 'deleted': (True|False)] } <timestamp> ::= Datetime in unix timestamp (seconds). Depends on the os time on the system on which the change happened. """ changes = set() with self.fs_access_lock: for file_data in self._index.values(): file_data['not_found'] = True for dirpath, dirnames, filenames in os.walk(self.dir_path): for name in filenames: file_path = pathext.normalize(os.path.join(dirpath, name)) if not re.search(self.IGNORE_PATTERNS, file_path): self._update_file_index(file_path, changes) # Mark each deleted file with the current timestamp # and UUID to avoid conflicts and to propagate properly timestamp = datetime.now().timestamp() for file_name, file_data in self._index.items(): if 'not_found' in file_data: del file_data['not_found'] if 'deleted' in file_data and file_data['deleted']: # File has been deleted some time ago... continue # File has been deleted now file_data['deleted'] = True file_data['last_update'] = timestamp file_data['last_update_location'] = self.uuid file_data['hash'] = b'' sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = timestamp changes.add(file_name) if changes: self.last_update = datetime.now().timestamp() if save_index and changes: self.save_index() if force: self.index_updated.notify(None) elif changes: self.index_updated.notify(changes) def _update_file_index(self, file_path, changes): relative_path = pathext.normalize( os.path.relpath(file_path, self.dir_path) ) file_data = self._index.setdefault(relative_path, dict()) if not file_data: # New file file_hash = bintools.hash_file(file_path) file_data['last_update'] = int(os.path.getmtime(file_path)) file_data['hash'] = file_hash file_data['last_update_location'] = self.uuid sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = file_data['last_update'] changes.add(relative_path) elif int(os.path.getmtime(file_path)) > file_data['last_update']: # Check if file is actually changed or the system time is off file_hash = bintools.hash_file(file_path) if file_data['hash'] != file_hash: # File modified locally (since last sync) file_data['last_update'] = int(os.path.getmtime(file_path)) file_data['hash'] = file_hash file_data['last_update_location'] = self.uuid sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = file_data['last_update'] changes.add(relative_path) if 'deleted' in file_data: file_data['last_update'] = datetime.now().timestamp() file_data['hash'] = bintools.hash_file(file_path) file_data['last_update_location'] = self.uuid sync_log = file_data.setdefault('sync_log', dict()) sync_log[self.uuid] = file_data['last_update'] changes.add(relative_path) del file_data['deleted'] if 'not_found' in file_data: del file_data['not_found'] def diff(self, remote_index): return IndexDiff.diff(self._index, remote_index) def finalize_transfer(self, transfer): if transfer.type == syncall.transfers.FileTransfer.TO_REMOTE: self.__finalize_transfer_to_remote(transfer) else: self.__finalize_transfer_from_remote(transfer) self.save_index() def __finalize_transfer_to_remote(self, transfer): with self.fs_access_lock: self.__update_index_after_transfer( transfer.file_name, self._get_index_unsafe(transfer.file_name), transfer.get_remote_uuid(), transfer.timestamp ) self.index_updated.notify({transfer.file_name}) def __finalize_transfer_from_remote(self, transfer): updated = False with self.fs_access_lock: diff = IndexDiff.compare_file( transfer.remote_file_data, self._get_index_unsafe(transfer.file_name) ) if diff == NEEDS_UPDATE: if 'deleted' in transfer.remote_file_data and \ transfer.remote_file_data['deleted']: try: os.remove(self.get_file_path(transfer.file_name)) except: pass else: try: os.makedirs( os.path.dirname( self.get_file_path(transfer.file_name) ) ) except: pass # Update the actual file shutil.move( transfer.get_temp_path(), self.get_file_path(transfer.file_name) ) # Update the file index self.__update_index_after_transfer( transfer.file_name, transfer.remote_file_data, transfer.messanger.my_uuid, transfer.timestamp ) updated = True else: self.logger.debug( "Skipping update of outdated file {} from {}" .format(transfer.file_name, transfer.get_remote_uuid()) ) if updated: self.index_updated.notify({transfer.file_name}) def __update_index_after_transfer(self, file_name, file_index, uuid, time): file_index['sync_log'][uuid] = time self._index[file_name] = file_index self.last_update = datetime.now().timestamp() self.transfer_finalized.notify((uuid, file_name, file_index))
class RemoteStore: """ Manages communication to a single remote SyncAll instance. """ def __init__(self, messanger, directory): self.logger = logging.getLogger(__name__) self.messanger = messanger self.directory = directory self.directory.transfer_finalized += self.__transfer_finalized self.my_index_last_updated = 0 self.remote_index = None self.address = self.messanger.address[0] self.my_uuid = self.messanger.my_uuid self.uuid = self.messanger.remote_uuid self.disconnected = Event() self.messanger.disconnected += self.__disconnected self.messanger.packet_received += self._packet_received def __transfer_finalized(self, data): remote_uuid, file_name, file_data = data if self.uuid != remote_uuid: return if self.remote_index is not None: self.remote_index[file_name] = file_data def request_transfer(self, transfer_messanger): # Pass the transfer request to the transfer manager self.directory.transfer_manager.process_transfer( self, transfer_messanger) def index_received(self): return self.remote_index is not None def start_receiving(self): self.messanger.start_receiving() self.send_index(request=False) def send_index(self, request=True, force=False): if not force and \ self.my_index_last_updated == self.directory.get_last_update(): # Nothing to do here, index is already up-to-date # self.logger.debug( # "Index update requested but there are no changes" # ) self.messanger.send({'type': MSG_INDEX_NO_CHANGE}) return self.my_index_last_updated = self.directory.get_last_update() self.messanger.send({ 'type': MSG_INDEX, 'index': self.directory.get_index() }) if request: self.messanger.send({'type': MSG_REQUEST_INDEX}) def send_index_delta(self, changes, request=True): """ Send only the changed files (`changes`) index data to the remote. Use ONLY when ALL changed files are sent this way. """ self.my_index_last_updated = self.directory.get_last_update() index = self.directory.get_index() if self.remote_index is not None: for file_name in list(changes): if file_name in self.remote_index and \ index[file_name] == self.remote_index[file_name]: changes.remove(file_name) if len(changes) == 0: return self.messanger.send({ 'type': MSG_INDEX_DELTA, 'index': {file_name: index[file_name] for file_name in changes} }) if request: self.messanger.send({'type': MSG_REQUEST_INDEX}) def __disconnected(self, no_data): self.directory.transfer_manager.remote_disconnect(self) self.disconnected.notify(self) def disconnect(self): self.messanger.disconnect() def _packet_received(self, packet): if 'type' not in packet: self.logger.error("Received packet with no type from {}".format( self.address)) return # self.logger.debug("Received packet from {}: {}".format( # self.address, # packet['type'] # )) if packet['type'] == MSG_INDEX: self.remote_index = packet['index'] self.__remote_index_updated() elif packet['type'] == MSG_INDEX_DELTA: updates = False for file_name, file_data in packet['index'].items(): if self.remote_index is None or \ file_name not in self.remote_index or \ self.remote_index[file_name] != file_data: updates = True self.remote_index[file_name] = file_data if updates: self.__remote_index_updated() elif packet['type'] == MSG_REQUEST_INDEX: self.send_index(request=False) elif packet['type'] == MSG_INDEX_NO_CHANGE: self.__remote_index_updated() else: self.logger.error("Unknown packet from {}: {}".format( self.address, packet['type'])) def __remote_index_updated(self): # self.logger.debug("{}'s index updated".format(self.address)) diff = self.directory.diff(self.remote_index) if diff[2]: self.logger.debug("File conflicts with {}: {}".format( self.uuid, diff[2])) # TODO: Handle conflicted files self.directory.transfer_manager.sync_files(self, diff[0]) self.directory.transfer_manager.sync_files(self, diff[1])