class ContinuousDiffMerger(threading.Thread): """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them""" @pydio_profile def __init__(self, job_config, job_data_path): """ Initialize thread internals :param job_config: JobConfig instance :param job_data_path: Filesystem path where the job data are stored :return: """ threading.Thread.__init__(self) self.last_run = 0 self.configs_path = job_data_path self.job_config = job_config sqlite_files = [file for file in os.listdir(self.configs_path) if file.endswith(".sqlite")] for sqlite_file in sqlite_files: try: exists_and_correct = check_sqlite_file(os.path.join(self.configs_path, sqlite_file)) if exists_and_correct: logging.info("Structure and Integrity of SQLite file %s is intact " % str( os.path.join(self.configs_path, sqlite_file))) except DBCorruptedException as e: logging.debug("SQLite file %s is corrupted (Reason: %s), Deleting file and Reinitialising sync" % (str(os.path.join(self.configs_path, sqlite_file)), e.message)) os.unlink(os.path.join(self.configs_path, sqlite_file)) self.update_sequences_file(0, 0) self.init_global_progress() self.basepath = job_config.directory self.ws_id = job_config.workspace self.sdk = PydioSdk( job_config.server, ws_id=self.ws_id, remote_folder=job_config.remote_folder, user_id=job_config.user_id, device_id=ConfigManager.Instance().get_device_id(), skip_ssl_verify=job_config.trust_ssl, proxies=ConfigManager.Instance().get_defined_proxies(), timeout=job_config.timeout ) self.system = SystemSdk(job_config.directory) self.remote_seq = 0 self.local_seq = 0 self.local_target_seq = 0 self.remote_target_seq = 0 self.local_seqs = [] self.remote_seqs = [] self.db_handler = LocalDbHandler(self.configs_path, job_config.directory) self.interrupt = False self.event_timer = 2 self.online_timer = job_config.online_timer self.offline_timer = 60 self.online_status = True self.job_status_running = True self.direction = job_config.direction self.event_logger = EventLogger(self.configs_path) self.processing_signals = {} self.current_tasks = [] self.event_handler = None self.watcher = None self.watcher_first_run = True # TODO: TO BE LOADED FROM CONFIG self.storage_watcher = job_config.label.startswith('LSYNC') self.marked_for_snapshot_pathes = [] self.processing = False # indicates whether changes are being processed dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START') if job_config.direction != 'down' or (self.job_config.direction == 'down' and self.job_config.solve != 'remote'): self.event_handler = SqlEventHandler(includes=job_config.filters['includes'], excludes=job_config.filters['excludes'], basepath=job_config.directory, job_data_path=self.configs_path) self.watcher = LocalWatcher(job_config.directory, self.configs_path, event_handler=self.event_handler) self.db_handler.check_lock_on_event_handler(self.event_handler) if os.path.exists(os.path.join(self.configs_path, "sequences")): try: with open(os.path.join(self.configs_path, "sequences"), "rb") as f: sequences = pickle.load(f) self.remote_seq = sequences['remote'] self.local_seq = sequences['local'] if self.event_handler: self.event_handler.last_seq_id = self.local_seq except Exception as e: logging.exception(e) # Wrong content, remove sequences file. os.unlink(os.path.join(self.configs_path, "sequences")) dispatcher.connect(self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=self.sdk) dispatcher.connect(self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=self.sdk) if self.job_config.frequency == 'manual': self.job_status_running = False self.logger = EventLogger(self.configs_path) # end init def update_sequences_file(self, local_seq, remote_seq): with open(os.path.join(self.configs_path, "sequences"), "wb") as f: pickle.dump(dict( local=local_seq, remote=remote_seq ), f) @pydio_profile def handle_transfer_callback_event(self, sender, change): self.processing_signals[change['target']] = change self.global_progress["queue_bytesize"] -= change['bytes_sent'] self.global_progress["queue_done"] += float(change['bytes_sent']) / float(change["total_size"]) @pydio_profile def handle_transfer_rate_event(self, sender, transfer_rate): """ Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value. :param sender:Any :param transfer_rate:float :return: """ if self.global_progress['last_transfer_rate'] > 0: self.global_progress['last_transfer_rate'] = (float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0 else: self.global_progress['last_transfer_rate'] = float(transfer_rate) @pydio_profile def is_running(self): """ Whether the job is in Running state or not. :return:bool """ return self.job_status_running def init_global_progress(self): """ Initialize the internal progress data :return:None """ self.global_progress = { 'status_indexing' :0, 'queue_length' :0, 'queue_done' :0.0, 'queue_bytesize' :0, 'last_transfer_rate':-1, 'queue_start_time' :time.clock(), 'total_time' :0 } @pydio_profile def update_global_progress(self, compute_queue_size=True): """ Compute a dict representation with many indications about the current state of the queue :return: dict """ self.global_progress['total_time'] = time.clock() - self.global_progress['queue_start_time'] if compute_queue_size: self.global_progress["queue_bytesize"] = self.compute_queue_bytesize() # compute an eta eta = -1 if self.global_progress['last_transfer_rate'] > -1 and self.global_progress['queue_bytesize'] > 0: eta = self.global_progress['queue_bytesize'] / self.global_progress['last_transfer_rate'] elif self.global_progress['queue_done']: remaining_operations = self.global_progress['queue_length'] - self.global_progress['queue_done'] eta = remaining_operations * self.global_progress['total_time'] / self.global_progress['queue_done'] self.global_progress['eta'] = eta #logging.info(self.global_progress) return self.global_progress def get_global_progress(self): self.update_global_progress(compute_queue_size=False) return self.global_progress @pydio_profile def update_current_tasks(self, cursor=0, limit=5): """ Get a list of the current tasks :return: list() """ if not hasattr(self, 'current_store'): return [] self.current_tasks = self.current_store.list_changes(cursor, limit) for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] @pydio_profile def get_current_tasks(self): for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] return { 'total': self.global_progress['queue_length'], 'current': self.current_tasks } @pydio_profile def compute_queue_bytesize(self): """ Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue. :return:float """ if not hasattr(self, 'current_store'): return 0 total = 0 exclude_pathes = [] for task in self.processing_signals: if 'remaining_bytes' in task: total += float(task['remaining_bytes']) exclude_pathes.append('"' + task['target'] + '"') if len(exclude_pathes): where = "target IN (" + ','.join(exclude_pathes) + ")" return self.current_store.sum_sizes(where) else: return self.current_store.sum_sizes() @pydio_profile def start_now(self): """ Resume task (set it in running mode) and make sure the cycle starts now :return: """ self.last_run = 0 self.sdk.remove_interrupt() self.resume() def pause(self): """ Set the task in pause. The thread is still running, but the cycle does nothing. :return:None """ self.job_status_running = False self.sdk.set_interrupt() self.info(_('Job Paused'), toUser='******', channel='status') @pydio_profile def resume(self): """ Set the task out of pause mode. :return: """ self.job_status_running = True self.sdk.proxies = ConfigManager.Instance().get_defined_proxies() self.sdk.remove_interrupt() self.info(_('Job Started'), toUser='******', channel='status') def stop(self): """ Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop. :return: """ if self.watcher: logging.debug("Stopping watcher: %s" % self.watcher) self.watcher.stop() self.info(_('Job stopping'), toUser='******', channel='status') self.sdk.set_interrupt() self.interrupt = True def sleep_offline(self): """ Sleep the thread for a "long" time (offline time) :return: """ self.online_status = False self.last_run = time.time() time.sleep(self.event_timer) def sleep_online(self): """ Sleep the thread for a "short" time (online time) :return: """ self.online_status = True self.last_run = time.time() time.sleep(self.event_timer) def exit_loop_clean(self, logger): #self.marked_for_snapshot_pathes = [] self.current_store.close() self.init_global_progress() logger.log_state(_('Synchronized'), 'success') if self.job_config.frequency == 'manual': self.job_status_running = False self.sleep_offline() else: self.sleep_online() @pydio_profile def run(self): """ Start the thread """ very_first = False self.start_watcher() while not self.interrupt: try: # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq)) self.processing_signals = {} self.init_global_progress() if very_first: self.global_progress['status_indexing'] = 1 interval = int(time.time() - self.last_run) if (self.online_status and interval < self.online_timer) or (not self.online_status and interval < self.offline_timer): time.sleep(self.event_timer) continue if not self.job_status_running: logging.debug("self.online_timer: %s" % self.online_timer) self.logger.log_state(_('Status: Paused'), "sync") self.sleep_offline() continue if self.job_config.frequency == 'time': start_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m'])) end_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59) now = datetime.datetime.now().time() if not start_time < now < end_time: self.logger.log_state(_('Status: scheduled for %s') % str(start_time), "sync") self.sleep_offline() continue else: logging.info("Now triggering synchro as expected at time " + str(start_time)) if not self.system.check_basepath(): log = _('Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry') % self.offline_timer logging.error(log) self.logger.log_state(_('Cannot find local folder, did you disconnect a volume?'), "error") self.sleep_offline() continue # Before starting infinite loop, small check that remote folder still exists if not self.sdk.check_basepath(): # if it doesn't try to create it self.sdk.remote_folder = os.path.join("/", self.sdk.remote_folder) try: logging.info("Creating remote directory.") self.sdk.mkdir("") continue except Exception as e: logging.exception(e) if not self.sdk.check_basepath(): log = _('Cannot find remote folder, maybe it was renamed? Sync cannot start, please check the configuration.') logging.error(log) self.logger.log_state(log, 'error') self.sleep_offline() continue if self.watcher: for snap_path in self.marked_for_snapshot_pathes: logging.info('LOCAL SNAPSHOT : loading snapshot for directory %s' % snap_path) if self.interrupt or not self.job_status_running: raise InterruptException() self.watcher.check_from_snapshot(snap_path) self.marked_for_snapshot_pathes = [] writewait = .5 # To avoid reading events before they're written (db lock) wait for writing to finish while self.event_handler.locked: logging.info("Waiting for changes to be written before retrieving remote changes.") if writewait < 5: writewait += .5 time.sleep(writewait) # Load local and/or remote changes, depending on the direction self.current_store = SqliteChangeStore(self.configs_path + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes'], self.job_config.poolsize, local_sdk=self.system, remote_sdk=self.sdk, job_config=self.job_config) self.current_store.open() try: if self.job_config.direction != 'up': logging.info( 'Loading remote changes with sequence {0:s} for job id {1:s}'.format(str(self.remote_seq), str(self.job_config.id))) if self.remote_seq == 0: self.logger.log_state(_('Gathering data from remote workspace, this can take a while...'), 'sync') very_first = True self.remote_target_seq = self.load_remote_changes_in_store(self.remote_seq, self.current_store) self.current_store.sync() else: self.remote_target_seq = 1 self.ping_remote() except RequestException as ce: logging.exception(ce) if not connection_helper.is_connected_to_internet(self.sdk.proxies): error = _('No Internet connection detected! Waiting for %s seconds to retry') % self.offline_timer else: error = _('Connection to server failed, server is probably down. Waiting %s seconds to retry') % self.offline_timer self.marked_for_snapshot_pathes = [] logging.error(error) self.logger.log_state(error, "wait") self.sleep_offline() continue except Exception as e: error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer) logging.exception(e) self.logger.log_state(_('Error while connecting to remote server (%s)') % e.message, "error") self.marked_for_snapshot_pathes = [] self.sleep_offline() continue self.online_status = True if not self.job_config.server_configs: self.job_config.server_configs = self.sdk.load_server_configs() self.sdk.set_server_configs(self.job_config.server_configs) if self.job_config.direction != 'down' or (self.job_config.direction == 'down' and self.job_config.solve != 'remote'): logging.info( 'Loading local changes with sequence {0:s} for job id {1:s}'.format(str(self.local_seq), str(self.job_config.id))) self.local_target_seq = self.db_handler.get_local_changes_as_stream(self.local_seq, self.current_store.flatten_and_store) self.current_store.sync() else: self.local_target_seq = 1 if not connection_helper.internet_ok: connection_helper.is_connected_to_internet(self.sdk.proxies) changes_length = len(self.current_store) if not changes_length: self.processing = False logging.info('No changes detected in ' + self.job_config.id) self.update_min_seqs_from_store() self.exit_loop_clean(self.logger) very_first = False #logging.info("CheckSync of " + self.job_config.id) #self.db_handler.list_non_idle_nodes() if not self.watcher.isAlive() and not self.interrupt: logging.info("File watcher died, restarting...") self.watcher.stop() self.watcher = LocalWatcher(self.job_config.directory, self.configs_path, event_handler=self.event_handler) self.start_watcher() continue self.global_progress['status_indexing'] = 1 logging.info('Reducing changes for ' + self.job_config.id) self.logger.log_state(_('Merging changes between remote and local, please wait...'), 'sync') # We are updating the status to IDLE here for the nodes which has status as NEW # The reason is when we create a new sync on the existing folder, some of the files might # already be synchronized and we ignore those files while we Dedup changes and those files # remain untouched later. # So the flow of node status change will occur as follows # NEW (as soon as we create a new sync task) # | # IDLE (here, just before we reduce changes) # | # PENDING (those files/folders which remain after reducing changes and to be actually processed) # | # UP / DOWN / CONFLICT (corresponding the operation which occurs) # | # IDLE (The final state once upload/ download happens or once when the conflict is resolved) self.db_handler.update_bulk_node_status_as_idle() logging.debug('[CMERGER] Delete Copies ' + self.job_config.id) self.current_store.delete_copies() self.update_min_seqs_from_store() logging.debug('[CMERGER] Dedup changes ' + self.job_config.id) self.current_store.dedup_changes() self.update_min_seqs_from_store() if not self.storage_watcher or very_first: logging.debug('[CMERGER] Detect unnecessary changes ' + self.ws_id) self.logger.log_state(_('Detecting unecessary changes...'), 'sync') self.current_store.detect_unnecessary_changes() logging.debug('[CMERGER] Done detecting unnecessary changes') self.logger.log_state(_('Done detecting unecessary changes...'), 'sync') self.update_min_seqs_from_store() logging.debug('Clearing op and pruning folders moves ' + self.job_config.id) self.current_store.clear_operations_buffer() self.current_store.prune_folders_moves() self.update_min_seqs_from_store() logging.debug('Store conflicts ' + self.job_config.id) store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler) if store_conflicts: if self.job_config.solve == 'both': logging.info('Marking nodes SOLVED:KEEPBOTH') for row in self.db_handler.list_conflict_nodes(): self.db_handler.update_node_status(row['node_path'], 'SOLVED:KEEPBOTH') store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler) if store_conflicts: logging.info('Conflicts detected, cannot continue!') self.logger.log_state(_('Conflicts detected, cannot continue!'), 'error') self.current_store.close() self.sleep_offline() self.logger.log_notif(_('Conflicts detected, cannot continue!'), 'error') continue if self.job_config.direction == 'down' and self.job_config.solve != 'remote': self.current_store.remove_based_on_location('local') self.update_min_seqs_from_store() changes_length = len(self.current_store) if not changes_length: logging.info('No changes detected for ' + self.job_config.id) self.exit_loop_clean(self.logger) very_first = False continue self.current_store.update_pending_status(self.db_handler, self.local_seq) self.global_progress['status_indexing'] = 0 import change_processor self.global_progress['queue_length'] = changes_length logging.info('Processing %i changes' % changes_length) self.logger.log_state(_('Processing %i changes') % changes_length, "start") counter = [1] def processor_callback(change): try: if self.interrupt or not self.job_status_running: raise InterruptException() self.update_current_tasks() self.update_global_progress() Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor proc = Processor(change, self.current_store, self.job_config, self.system, self.sdk, self.db_handler, self.event_logger) proc.process_change() self.update_min_seqs_from_store(success=True) self.global_progress['queue_done'] = float(counter[0]) counter[0] += 1 self.update_current_tasks() self.update_global_progress() time.sleep(0.1) if self.interrupt or not self.job_status_running: raise InterruptException() except ProcessException as pe: logging.error(pe.message) return False except InterruptException as i: raise i except PydioSdkDefaultException as p: raise p except Exception as ex: logging.exception(ex) return False return True def processor_callback2(change): try: if self.interrupt or not self.job_status_running: raise InterruptException() Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor proc = Processor(change, self.current_store, self.job_config, self.system, self.sdk, self.db_handler, self.event_logger) proc.process_change() if self.interrupt or not self.job_status_running: raise InterruptException() except PydioSdkException as pe: if pe.message.find("Original file") > -1: pe.code = 1404 raise pe except ProcessException as pe: logging.error(pe.message) return False except PydioSdkDefaultException as p: raise p except InterruptException as i: raise i return True try: if sys.platform.startswith('win'): self.marked_for_snapshot_pathes = list(set(self.current_store.find_modified_parents()) - set(self.marked_for_snapshot_pathes)) if not self.processing: self.processing = True for i in self.current_store.process_changes_with_callback(processor_callback, processor_callback2): if self.interrupt: raise InterruptException #logging.info("Updating seqs") self.current_store.process_pending_changes() self.update_min_seqs_from_store(success=True) self.global_progress['queue_done'] = float(counter[0]) counter[0] += 1 self.update_current_tasks() self.update_global_progress() time.sleep(0.05) # Allow for changes to be noticeable in UI time.sleep(.5) self.current_store.process_pending_changes() self.update_min_seqs_from_store(success=True) self.update_current_tasks() self.update_global_progress() #logging.info("DONE WITH CHANGES") self.processing = False except InterruptException as iexc: pass self.logger.log_state(_('%i files modified') % self.global_progress['queue_done'], 'success') if self.global_progress['queue_done']: self.logger.log_notif(_('%i files modified') % self.global_progress['queue_done'], 'success') self.exit_loop_clean(self.logger) except PydioSdkDefaultException as re: logging.error(re.message) self.logger.log_state(re.message, 'error') except SSLError as rt: logging.error(rt.message) self.logger.log_state(_('An SSL error happened, please check the logs'), 'error') except ProxyError as rt: logging.error(rt.message) self.logger.log_state(_('A proxy error happened, please check the logs'), 'error') except TooManyRedirects as rt: logging.error(rt.message) self.logger.log_state(_('Connection error: too many redirects'), 'error') except ChunkedEncodingError as rt: logging.error(rt.message) self.logger.log_state(_('Chunked encoding error, please check the logs'), 'error') except ContentDecodingError as rt: logging.error(rt.message) self.logger.log_state(_('Content Decoding error, please check the logs'), 'error') except InvalidSchema as rt: logging.error(rt.message) self.logger.log_state(_('Http connection error: invalid schema.'), 'error') except InvalidURL as rt: logging.error(rt.message) self.logger.log_state(_('Http connection error: invalid URL.'), 'error') except Timeout as to: logging.error(to) self.logger.log_state(_('Connection timeout, will retry later.'), 'error') except RequestException as ree: logging.error(ree.message) self.logger.log_state(_('Cannot resolve domain!'), 'error') except Exception as e: if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')): logging.exception('Unexpected Error: %s' % e.message) self.logger.log_state(_('Unexpected Error: %s') % e.message, 'error') else: logging.exception(e) self.sleep_offline() logging.debug('Finished this cycle, waiting for %i seconds' % self.online_timer) very_first = False def start_watcher(self): if self.watcher: if self.watcher_first_run: def status_callback(status): self.logger.log_state(status, 'sync') self.init_global_progress() try: self.global_progress['status_indexing'] = 1 self.logger.log_state(_('Checking changes since last launch...'), "sync") very_first = True self.db_handler.update_bulk_node_status_as_idle() self.watcher.check_from_snapshot(state_callback=status_callback) except DBCorruptedException as e: self.stop() JobsLoader.Instance().clear_job_data(self.job_config.id) logging.error(e) return except Exception as e: logging.exception(e) self.interrupt = True self.logger.log_state(_('Oops, error while indexing the local folder. Pausing the task.'), 'error') logging.error(e) self.watcher_first_run = False self.watcher.start() @pydio_profile def update_min_seqs_from_store(self, success=False): self.local_seq = self.current_store.get_min_seq('local', success=success) if self.local_seq == -1: self.local_seq = self.local_target_seq self.remote_seq = self.current_store.get_min_seq('remote', success=success) if self.remote_seq == -1: self.remote_seq = self.remote_target_seq #logging.info('Storing sequences remote=%i local=%i', self.remote_seq, self.local_seq) self.update_sequences_file(self.local_seq, self.remote_seq) if self.event_handler: self.event_handler.last_seq_id = self.local_seq @pydio_profile def ping_remote(self): """ Simple stat of the remote server root, to know if it's reachable. :return:bool """ test = self.sdk.stat('/') return (test != False) def info(self, message, toUser=False, channel='sync'): logging.info(message) if toUser: dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message) @pydio_profile def load_remote_changes_in_store(self, seq_id, store): last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store) """try: self.sdk.websocket_send() except Exception as e: logging.exception(e)""" return last_seq
class ContinuousDiffMerger(threading.Thread): """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them""" def __init__(self, job_config, job_data_path): """ Initialize thread internals :param job_config: JobConfig instance :param job_data_path: Filesystem path where the job data are stored :return: """ threading.Thread.__init__(self) self.last_run = 0 self.configs_path = job_data_path self.job_config = job_config self.init_global_progress() self.basepath = job_config.directory self.ws_id = job_config.workspace self.sdk = PydioSdk( job_config.server, ws_id=self.ws_id, remote_folder=job_config.remote_folder, user_id=job_config.user_id, device_id=ConfigManager.Instance().get_device_id(), skip_ssl_verify=job_config.trust_ssl, proxies=ConfigManager.Instance().get_defined_proxies()) self.system = SystemSdk(job_config.directory) self.remote_seq = 0 self.local_seq = 0 self.local_target_seq = 0 self.remote_target_seq = 0 self.local_seqs = [] self.remote_seqs = [] self.db_handler = LocalDbHandler(self.configs_path, job_config.directory) self.interrupt = False self.event_timer = 2 self.online_timer = 10 self.offline_timer = 60 self.online_status = True self.job_status_running = True self.direction = job_config.direction self.event_logger = EventLogger(self.configs_path) self.processing_signals = {} self.current_tasks = [] self.event_handler = None self.watcher = None self.watcher_first_run = True # TODO: TO BE LOADED FROM CONFIG self.storage_watcher = job_config.label.startswith('LSYNC') self.marked_for_snapshot_pathes = [] dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START') if job_config.direction != 'down': self.event_handler = SqlEventHandler( includes=job_config.filters['includes'], excludes=job_config.filters['excludes'], basepath=job_config.directory, job_data_path=self.configs_path) self.watcher = LocalWatcher(job_config.directory, self.configs_path, event_handler=self.event_handler) self.db_handler.check_lock_on_event_handler(self.event_handler) if os.path.exists(self.configs_path + "/sequences"): try: sequences = pickle.load( open(self.configs_path + "/sequences", "rb")) self.remote_seq = sequences['remote'] self.local_seq = sequences['local'] if self.event_handler: self.event_handler.last_seq_id = self.local_seq except Exception: # Wrong content, remove sequences file. os.unlink(self.configs_path + "/sequences") dispatcher.connect(self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=self.sdk) dispatcher.connect(self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=self.sdk) if self.job_config.frequency == 'manual': self.job_status_running = False def handle_transfer_callback_event(self, sender, change): self.processing_signals[change['target']] = change self.global_progress["queue_bytesize"] -= change['bytes_sent'] self.global_progress["queue_done"] += float( change['bytes_sent']) / float(change["total_size"]) def handle_transfer_rate_event(self, sender, transfer_rate): """ Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value. :param sender:Any :param transfer_rate:float :return: """ if self.global_progress['last_transfer_rate'] > 0: self.global_progress['last_transfer_rate'] = ( float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0 else: self.global_progress['last_transfer_rate'] = float(transfer_rate) def is_running(self): """ Whether the job is in Running state or not. :return:bool """ return self.job_status_running def init_global_progress(self): """ Initialize the internal progress data :return:None """ self.global_progress = { 'status_indexing': 0, 'queue_length': 0, 'queue_done': 0.0, 'queue_bytesize': 0, 'last_transfer_rate': -1, 'queue_start_time': time.clock(), 'total_time': 0 } def update_global_progress(self, compute_queue_size=True): """ Compute a dict representation with many indications about the current state of the queue :return: dict """ self.global_progress['total_time'] = time.clock( ) - self.global_progress['queue_start_time'] if compute_queue_size: self.global_progress[ "queue_bytesize"] = self.compute_queue_bytesize() # compute an eta eta = -1 if self.global_progress[ 'last_transfer_rate'] > -1 and self.global_progress[ 'queue_bytesize'] > 0: eta = self.global_progress[ 'queue_bytesize'] / self.global_progress['last_transfer_rate'] elif self.global_progress['queue_done']: remaining_operations = self.global_progress[ 'queue_length'] - self.global_progress['queue_done'] eta = remaining_operations * self.global_progress[ 'total_time'] / self.global_progress['queue_done'] self.global_progress['eta'] = eta #logging.info(self.global_progress) return self.global_progress def get_global_progress(self): self.update_global_progress(compute_queue_size=False) return self.global_progress def update_current_tasks(self, cursor=0, limit=5): """ Get a list of the current tasks :return: list() """ if not hasattr(self, 'current_store'): return [] self.current_tasks = self.current_store.list_changes(cursor, limit) for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] def get_current_tasks(self): for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] return { 'total': self.global_progress['queue_length'], 'current': self.current_tasks } def compute_queue_bytesize(self): """ Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue. :return:float """ if not hasattr(self, 'current_store'): return 0 total = 0 exclude_pathes = [] for task in self.processing_signals: if 'remaining_bytes' in task: total += float(task['remaining_bytes']) exclude_pathes.append('"' + task['target'] + '"') where = '' if len(exclude_pathes): where = "target IN (" + ','.join(exclude_pathes) + ")" return self.current_store.sum_sizes(where) else: return self.current_store.sum_sizes() def start_now(self): """ Resume task (set it in running mode) and make sure the cycle starts now :return: """ self.last_run = 0 self.sdk.remove_interrupt() self.resume() def pause(self): """ Set the task in pause. The thread is still running, but the cycle does nothing. :return:None """ self.job_status_running = False self.sdk.set_interrupt() self.info(_('Job Paused'), toUser='******', channel='status') def resume(self): """ Set the task out of pause mode. :return: """ self.job_status_running = True self.sdk.remove_interrupt() self.info(_('Job Started'), toUser='******', channel='status') def stop(self): """ Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop. :return: """ if self.watcher: logging.debug("Stopping watcher: %s" % self.watcher) self.watcher.stop() self.info(_('Job stopping'), toUser='******', channel='status') self.sdk.set_interrupt() self.interrupt = True def sleep_offline(self): """ Sleep the thread for a "long" time (offline time) :return: """ self.online_status = False self.last_run = time.time() time.sleep(self.event_timer) def sleep_online(self): """ Sleep the thread for a "short" time (online time) :return: """ self.online_status = True self.last_run = time.time() time.sleep(self.event_timer) def exit_loop_clean(self, logger): self.marked_for_snapshot_pathes = [] self.current_store.close() self.init_global_progress() logger.log_state(_('Synchronized'), 'success') if self.job_config.frequency == 'manual': self.job_status_running = False self.sleep_offline() else: self.sleep_online() def run(self): """ Start the thread """ logger = EventLogger(self.configs_path) very_first = False if self.watcher: if self.watcher_first_run: def status_callback(status): logger.log_state(status, 'sync') self.init_global_progress() try: self.global_progress['status_indexing'] = 1 logger.log_state( _('Checking changes since last launch...'), "sync") very_first = True self.watcher.check_from_snapshot( state_callback=status_callback) except DBCorruptedException as e: self.stop() JobsLoader.Instance().clear_job_data(self.job_config.id) logging.error(e) return except Exception as e: self.interrupt = True logger.log_state( _('Oops, error while indexing the local folder. Pausing the task.' ), 'error') logging.error(e) self.watcher_first_run = False self.watcher.start() while not self.interrupt: try: # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq)) self.processing_signals = {} self.init_global_progress() if very_first: self.global_progress['status_indexing'] = 1 interval = int(time.time() - self.last_run) if (self.online_status and interval < self.online_timer) or ( not self.online_status and interval < self.offline_timer): time.sleep(self.event_timer) continue if not self.job_status_running: logging.debug("self.online_timer: %s" % self.online_timer) logger.log_state(_('Status: Paused'), "sync") self.sleep_offline() continue if self.job_config.frequency == 'time': start_time = datetime.time( int(self.job_config.start_time['h']), int(self.job_config.start_time['m'])) end_time = datetime.time( int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59) now = datetime.datetime.now().time() if not start_time < now < end_time: logger.log_state( _('Status: scheduled for %s') % str(start_time), "sync") self.sleep_offline() continue else: logging.info( "Now triggering synchro as expected at time " + str(start_time)) if not self.system.check_basepath(): log = _( 'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry' ) % self.offline_timer logging.error(log) logger.log_state( _('Cannot find local folder, did you disconnect a volume?' ), "error") self.sleep_offline() continue if self.watcher: for snap_path in self.marked_for_snapshot_pathes: logging.info( 'LOCAL SNAPSHOT : loading snapshot for directory %s' % snap_path) if self.interrupt or not self.job_status_running: raise InterruptException() self.watcher.check_from_snapshot(snap_path) # Load local and/or remote changes, depending on the direction from pydio.job.change_stores import SqliteChangeStore self.current_store = SqliteChangeStore( self.configs_path + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes']) self.current_store.open() try: if self.job_config.direction != 'up': logging.info( 'Loading remote changes with sequence %s' % str(self.remote_seq)) if self.remote_seq == 0: logger.log_state( _('Gathering data from remote workspace, this can take a while...' ), 'sync') very_first = True self.remote_target_seq = self.load_remote_changes_in_store( self.remote_seq, self.current_store) self.current_store.sync() else: self.remote_target_seq = 1 self.ping_remote() except RequestException as ce: if not connection_helper.is_connected_to_internet(): error = _( 'No Internet connection detected! Waiting for %s seconds to retry' ) % self.offline_timer else: error = _( 'Connection to server failed, server is probably down. Waiting %s seconds to retry' ) % self.offline_timer self.marked_for_snapshot_pathes = [] logging.error(error) logger.log_state(error, "wait") self.sleep_offline() continue except Exception as e: error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % ( e.message, self.offline_timer) logging.error(error) logger.log_state( _('Error while connecting to remote server (%s)') % e.message, "error") self.marked_for_snapshot_pathes = [] self.sleep_offline() continue self.online_status = True if not self.job_config.server_configs: self.job_config.server_configs = self.sdk.load_server_configs( ) self.sdk.set_server_configs(self.job_config.server_configs) if self.job_config.direction != 'down': logging.info('Loading local changes with sequence ' + str(self.local_seq)) self.local_target_seq = self.db_handler.get_local_changes_as_stream( self.local_seq, self.current_store.flatten_and_store) self.current_store.sync() else: self.local_target_seq = 1 if not connection_helper.internet_ok: connection_helper.is_connected_to_internet() changes_length = len(self.current_store) if not changes_length: logging.info('No changes detected') self.update_min_seqs_from_store() self.exit_loop_clean(logger) very_first = False continue self.global_progress['status_indexing'] = 1 logging.info('Reducing changes') logger.log_state( _('Merging changes between remote and local, please wait...' ), 'sync') logging.debug('Delete Copies') self.current_store.delete_copies() self.update_min_seqs_from_store() logging.debug('Dedup changes') self.current_store.dedup_changes() self.update_min_seqs_from_store() if not self.storage_watcher or very_first: logging.debug('Detect unnecessary changes') self.current_store.detect_unnecessary_changes( local_sdk=self.system, remote_sdk=self.sdk) self.update_min_seqs_from_store() logging.debug('Clearing op and pruning folders moves') self.current_store.clear_operations_buffer() self.current_store.prune_folders_moves() self.update_min_seqs_from_store() logging.debug('Store conflicts') store_conflicts = self.current_store.clean_and_detect_conflicts( self.db_handler) if store_conflicts: logging.info('Conflicts detected, cannot continue!') logger.log_state(_('Conflicts detected, cannot continue!'), 'error') self.current_store.close() self.sleep_offline() continue changes_length = len(self.current_store) if not changes_length: logging.info('No changes detected') self.exit_loop_clean(logger) very_first = False continue self.global_progress['status_indexing'] = 0 import change_processor self.global_progress['queue_length'] = changes_length logging.info('Processing %i changes' % changes_length) logger.log_state( _('Processing %i changes') % changes_length, "start") counter = [1] def processor_callback(change): try: if self.interrupt or not self.job_status_running: raise InterruptException() self.update_current_tasks() self.update_global_progress() Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor proc = Processor(change, self.current_store, self.job_config, self.system, self.sdk, self.db_handler, self.event_logger) proc.process_change() self.update_min_seqs_from_store(success=True) self.global_progress['queue_done'] = float(counter[0]) counter[0] += 1 self.update_current_tasks() self.update_global_progress() time.sleep(0.1) if self.interrupt or not self.job_status_running: raise InterruptException() except ProcessException as pe: logging.error(pe.message) return False except InterruptException as i: raise i except PydioSdkDefaultException as p: raise p except Exception as ex: logging.exception(ex.message) return False return True try: if sys.platform.startswith('win'): self.marked_for_snapshot_pathes = list( set(self.current_store.find_modified_parents()) - set(self.marked_for_snapshot_pathes)) self.current_store.process_changes_with_callback( processor_callback) except InterruptException as iexc: pass logger.log_state( _('%i files modified') % self.global_progress['queue_done'], 'success') if self.global_progress['queue_done']: logger.log_notif( _('%i files modified') % self.global_progress['queue_done'], 'success') except PydioSdkDefaultException as re: logging.error(re.message) logger.log_state(re.message, 'error') except SSLError as rt: logging.error(rt.message) logger.log_state( _('An SSL error happened, please check the logs'), 'error') except ProxyError as rt: logging.error(rt.message) logger.log_state( _('A proxy error happened, please check the logs'), 'error') except TooManyRedirects as rt: logging.error(rt.message) logger.log_state(_('Connection error: too many redirects'), 'error') except ChunkedEncodingError as rt: logging.error(rt.message) logger.log_state( _('Chunked encoding error, please check the logs'), 'error') except ContentDecodingError as rt: logging.error(rt.message) logger.log_state( _('Content Decoding error, please check the logs'), 'error') except InvalidSchema as rt: logging.error(rt.message) logger.log_state(_('Http connection error: invalid schema.'), 'error') except InvalidURL as rt: logging.error(rt.message) logger.log_state(_('Http connection error: invalid URL.'), 'error') except Timeout as to: logging.error(to) logger.log_state(_('Connection timeout, will retry later.'), 'error') except RequestException as ree: logging.error(ree.message) logger.log_state(_('Cannot resolve domain!'), 'error') except Exception as e: if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')): logging.exception('Unexpected Error: %s' % e.message) logger.log_state( _('Unexpected Error: %s') % e.message, 'error') logging.debug('Finished this cycle, waiting for %i seconds' % self.online_timer) self.exit_loop_clean(logger) very_first = False def update_min_seqs_from_store(self, success=False): self.local_seq = self.current_store.get_min_seq('local', success=success) if self.local_seq == -1: self.local_seq = self.local_target_seq self.remote_seq = self.current_store.get_min_seq('remote', success=success) if self.remote_seq == -1: self.remote_seq = self.remote_target_seq logging.debug('Storing sequences remote=%i local=%i', self.remote_seq, self.local_seq) pickle.dump(dict(local=self.local_seq, remote=self.remote_seq), open(self.configs_path + '/sequences', 'wb')) if self.event_handler: self.event_handler.last_seq_id = self.local_seq def ping_remote(self): """ Simple stat of the remote server root, to know if it's reachable. :return:bool """ test = self.sdk.stat('/') return (test != False) def info(self, message, toUser=False, channel='sync'): logging.info(message) if toUser: dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message) def load_remote_changes_in_store(self, seq_id, store): last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store) return last_seq
class ContinuousDiffMerger(threading.Thread): """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them""" def __init__(self, job_config, job_data_path): """ Initialize thread internals :param job_config: JobConfig instance :param job_data_path: Filesystem path where the job data are stored :return: """ threading.Thread.__init__(self) self.last_run = 0 self.data_base = job_data_path self.job_config = job_config self.init_global_progress() self.basepath = job_config.directory self.ws_id = job_config.workspace self.sdk = PydioSdk( job_config.server, ws_id=self.ws_id, remote_folder=job_config.remote_folder, user_id=job_config.user_id, device_id=ConfigManager.Instance().get_device_id(), skip_ssl_verify=job_config.trust_ssl ) self.system = SystemSdk(job_config.directory) self.remote_seq = 0 self.local_seq = 0 self.local_target_seq = 0 self.remote_target_seq = 0 self.local_seqs = [] self.remote_seqs = [] self.db_handler = LocalDbHandler(self.data_base, job_config.directory) self.interrupt = False self.event_timer = 2 self.online_timer = 10 self.offline_timer = 60 self.online_status = True self.job_status_running = True self.direction = job_config.direction self.event_logger = EventLogger(self.data_base) self.processing_signals = {} self.current_tasks = [] self.event_handler = None self.watcher = None self.watcher_first_run = True dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START') if job_config.direction != 'down': self.event_handler = SqlEventHandler(includes=job_config.filters['includes'], excludes=job_config.filters['excludes'], basepath=job_config.directory, job_data_path=job_data_path) self.watcher = LocalWatcher(job_config.directory, job_data_path, event_handler=self.event_handler) self.db_handler.check_lock_on_event_handler(self.event_handler) if os.path.exists(self.data_base + "/sequences"): try: sequences = pickle.load(open(self.data_base + "/sequences", "rb")) self.remote_seq = sequences['remote'] self.local_seq = sequences['local'] if self.event_handler: self.event_handler.last_seq_id = self.local_seq except Exception: # Wrong content, remove sequences file. os.unlink(self.data_base + "/sequences") dispatcher.connect( self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=dispatcher.Any ) dispatcher.connect( self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=dispatcher.Any ) if self.job_config.frequency == 'manual': self.job_status_running = False def handle_transfer_callback_event(self, sender, change): self.processing_signals[change['target']] = change self.global_progress["queue_bytesize"] -= change['bytes_sent'] self.global_progress["queue_done"] += float(change['bytes_sent']) / float(change["total_size"]) def handle_transfer_rate_event(self, sender, transfer_rate): """ Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value. :param sender:Any :param transfer_rate:float :return: """ if self.global_progress['last_transfer_rate'] > 0: self.global_progress['last_transfer_rate'] = (float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0 else: self.global_progress['last_transfer_rate'] = float(transfer_rate) def is_running(self): """ Whether the job is in Running state or not. :return:bool """ return self.job_status_running def init_global_progress(self): """ Initialize the internal progress data :return:None """ self.global_progress = { 'queue_length' :0, 'queue_done' :0.0, 'queue_bytesize' :0, 'last_transfer_rate':-1, 'queue_start_time' :time.clock(), 'total_time' :0 } def update_global_progress(self): """ Compute a dict representation with many indications about the current state of the queue :return: dict """ self.global_progress['total_time'] = time.clock() - self.global_progress['queue_start_time'] self.global_progress["queue_bytesize"] = self.compute_queue_bytesize() # compute an eta eta = -1 if self.global_progress['last_transfer_rate'] > -1 and self.global_progress['queue_bytesize'] > 0 : eta = self.global_progress['queue_bytesize'] / self.global_progress['last_transfer_rate'] elif self.global_progress['queue_done']: remaining_operations = self.global_progress['queue_length'] - self.global_progress['queue_done'] eta = remaining_operations * self.global_progress['total_time'] / self.global_progress['queue_done'] self.global_progress['eta'] = eta #logging.info(self.global_progress) return self.global_progress def get_global_progress(self): return self.global_progress def update_current_tasks(self, cursor=0, limit=5): """ Get a list of the current tasks :return: list() """ if not hasattr(self, 'current_store'): return [] self.current_tasks = self.current_store.list_changes(cursor, limit) for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] def get_current_tasks(self): for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] return { 'total': self.global_progress['queue_length'], 'current': self.current_tasks } def compute_queue_bytesize(self): """ Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue. :return:float """ if not hasattr(self, 'current_store'): return 0 total = 0 exclude_pathes = [] for task in self.processing_signals: if 'remaining_bytes' in task: total += float(task['remaining_bytes']) exclude_pathes.append('"' + task['target'] + '"') where = '' if len(exclude_pathes): where = "target IN (" + ','.join(exclude_pathes) + ")" return self.current_store.sum_sizes(where) else: return self.current_store.sum_sizes() def start_now(self): """ Resume task (set it in running mode) and make sure the cycle starts now :return: """ self.last_run = 0 self.resume() def pause(self): """ Set the task in pause. The thread is still running, but the cycle does nothing. :return:None """ self.job_status_running = False self.info('Job Paused', toUser='******', channel='status') def resume(self): """ Set the task out of pause mode. :return: """ self.job_status_running = True self.info('Job Started', toUser='******', channel='status') def stop(self): """ Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop. :return: """ if hasattr(self, 'watcher'): logging.debug("Stopping watcher: %s" % self.watcher) self.watcher.stop() self.info('Job stopping', toUser='******', channel='status') self.interrupt = True def sleep_offline(self): """ Sleep the thread for a "long" time (offline time) :return: """ self.online_status = False self.last_run = time.time() time.sleep(self.event_timer) def sleep_online(self): """ Sleep the thread for a "short" time (online time) :return: """ self.online_status = True self.last_run = time.time() time.sleep(self.event_timer) def run(self): """ Start the thread """ logger = EventLogger(self.data_base) very_first = False if self.watcher: if self.watcher_first_run: logger.log_state('Checking changes since last launch...', "sync") very_first = True self.watcher.check_from_snapshot() self.watcher_first_run = False self.watcher.start() while not self.interrupt: try: # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq)) self.processing_signals = {} self.init_global_progress() interval = int(time.time() - self.last_run) if (self.online_status and interval < self.online_timer) or (not self.online_status and interval < self.offline_timer): time.sleep(self.event_timer) continue if not self.job_status_running: logging.debug("self.online_timer: %s" % self.online_timer) logger.log_state('Status: Paused', "sync") self.sleep_offline() continue if self.job_config.frequency == 'time': start_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m'])) end_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59) now = datetime.datetime.now().time() if not start_time < now < end_time: logger.log_state('Status: scheduled for %s' % str(start_time), "sync") self.sleep_offline() continue else: logging.info("Now triggering synchro as expected at time " + start_time) if not self.system.check_basepath(): log = 'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry' % self.offline_timer logging.error(log) logger.log_state('Cannot find local folder, did you disconnect a volume?', "error") self.sleep_offline() continue # Load local and/or remote changes, depending on the direction from pydio.job.change_stores import SqliteChangeStore self.current_store = SqliteChangeStore(self.data_base + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes']) self.current_store.open() try: if self.job_config.direction != 'up': logging.info('Loading remote changes with sequence ' + str(self.remote_seq)) if self.remote_seq == 0: logger.log_state('Gathering data from remote workspace, this can take a while...', 'sync') very_first = True self.remote_target_seq = self.load_remote_changes_in_store(self.remote_seq, self.current_store) self.current_store.sync() else: self.remote_target_seq = 1 self.ping_remote() except ConnectionError as ce: error = 'No connection detected, waiting %s seconds to retry' % self.offline_timer logging.error(error) logger.log_state(error, "wait") self.sleep_offline() continue except Exception as e: error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer) logging.error(error) logger.log_state('Error while connecting to remote server (%s)' % e.message, "error") self.sleep_offline() continue self.online_status = True if not self.job_config.server_configs: self.job_config.server_configs = self.sdk.load_server_configs() self.sdk.set_server_configs(self.job_config.server_configs) if self.job_config.direction != 'down': logging.info('Loading local changes with sequence ' + str(self.local_seq)) self.local_target_seq = self.db_handler.get_local_changes_as_stream(self.local_seq, self.current_store.flatten_and_store) self.current_store.sync() else: self.local_target_seq = 1 logging.info('Reducing changes') self.current_store.delete_copies() self.update_min_seqs_from_store() self.current_store.dedup_changes() self.update_min_seqs_from_store() self.current_store.detect_unnecessary_changes(local_sdk=self.system, remote_sdk=self.sdk) self.update_min_seqs_from_store() #self.current_store.filter_out_echoes_events() #self.update_min_seqs_from_store() self.current_store.clear_operations_buffer() self.current_store.prune_folders_moves() self.update_min_seqs_from_store() store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler) if store_conflicts: logging.info('Conflicts detected, cannot continue!') logger.log_state('Conflicts detected, cannot continue!', 'error') self.current_store.close() self.sleep_offline() continue changes_length = len(self.current_store) if changes_length: import change_processor self.global_progress['queue_length'] = changes_length logging.info('Processing %i changes' % changes_length) logger.log_state('Processing %i changes' % changes_length, "start") counter = [1] def processor_callback(change): try: if self.interrupt or not self.job_status_running: raise InterruptException() self.update_current_tasks() self.update_global_progress() proc = ChangeProcessor(change, self.current_store, self.job_config, self.system, self.sdk, self.db_handler, self.event_logger) proc.process_change() self.update_min_seqs_from_store(success=True) self.global_progress['queue_done'] = float(counter[0]) counter[0] += 1 self.update_current_tasks() self.update_global_progress() time.sleep(0.1) if self.interrupt or not self.job_status_running: raise InterruptException() except ProcessException as pe: logging.error(pe.message) return False except InterruptException as i: raise i except PydioSdkDefaultException as p: raise p except Exception as ex: logging.exception(ex.message) return False return True try: self.current_store.process_changes_with_callback(processor_callback) except InterruptException as iexc: pass logger.log_state('%i files modified' % self.global_progress['queue_done'], "success") else: logging.info('No changes detected') if very_first: logger.log_state('Remote and local are synchronized', 'success') except PydioSdkDefaultException as re: logging.warning(re.message) logger.log_state(re.message, 'error') except Exception as e: if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')): logging.exception('Unexpected Error: %s' % e.message) logger.log_state('Unexpected Error: %s' % e.message, 'error') logging.info('Finished this cycle, waiting for %i seconds' % self.online_timer) self.current_store.close() self.init_global_progress() if self.job_config.frequency == 'manual': self.job_status_running = False self.sleep_offline() else: self.sleep_online() def update_min_seqs_from_store(self, success=False): self.local_seq = self.current_store.get_min_seq('local', success=success) if self.local_seq == -1: self.local_seq = self.local_target_seq self.remote_seq = self.current_store.get_min_seq('remote', success=success) if self.remote_seq == -1: self.remote_seq = self.remote_target_seq logging.debug('Storing sequences remote %i local %i', self.local_seq, self.remote_seq) pickle.dump(dict( local=self.local_seq, remote=self.remote_seq ), open(self.data_base + '/sequences', 'wb')) if self.event_handler: self.event_handler.last_seq_id = self.local_seq def ping_remote(self): """ Simple stat of the remote server root, to know if it's reachable. :return:bool """ test = self.sdk.stat('/') return (test != False) def info(self, message, toUser=False, channel='sync'): logging.info(message) if toUser: dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message) def load_remote_changes_in_store(self, seq_id, store): last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store) return last_seq
class ContinuousDiffMerger(threading.Thread): """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them""" def __init__(self, job_config, job_data_path, pub_socket=False): threading.Thread.__init__(self) self.data_base = job_data_path self.job_config = job_config self.progress = 0 self.basepath = job_config.directory self.ws_id = job_config.workspace self.sdk = PydioSdk( job_config.server, ws_id=self.ws_id, remote_folder=job_config.remote_folder, user_id=job_config.user_id ) self.system = SystemSdk(job_config.directory) self.remote_seq = 0 self.local_seq = 0 self.local_target_seq = 0 self.remote_target_seq = 0 self.local_seqs = [] self.remote_seqs = [] self.db_handler = LocalDbHandler(self.data_base, job_config.directory) self.interrupt = False self.online_timer = 10 self.offline_timer = 60 self.online_status = True self.job_status_running = True self.direction = job_config.direction if pub_socket: self.pub_socket = pub_socket self.info('Job Started', toUser='******', channel='status') if os.path.exists(self.data_base + "/sequences"): sequences = pickle.load(open(self.data_base + "/sequences", "rb")) self.remote_seq = sequences['remote'] self.local_seq = sequences['local'] if job_config.direction != 'down': self.watcher = LocalWatcher(job_config.directory, job_config.filters['includes'], job_config.filters['excludes'], job_data_path) dispatcher.connect( self.handle_progress_event, signal=PROGRESS_SIGNAL, sender=dispatcher.Any ) def handle_progress_event(self, sender, progress): self.info('Job progress is %s' % progress) def is_running(self): return self.job_status_running def pause(self): self.job_status_running = False self.info('Job Paused', toUser='******', channel='status') def resume(self): self.job_status_running = True self.info('Job Started', toUser='******', channel='status') def stop(self): if hasattr(self, 'watcher'): self.watcher.stop() self.interrupt = True def run(self): if hasattr(self, 'watcher'): self.watcher.start() while not self.interrupt: try: if not self.job_status_running: time.sleep(self.online_timer) continue if not self.system.check_basepath(): logging.info('Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry' % self.offline_timer) time.sleep(self.offline_timer) continue # Load local and/or remote changes, depending on the direction local_changes = dict(data=dict(), path_to_seqs=dict()) remote_changes = dict(data=dict(), path_to_seqs=dict()) try: if self.job_config.direction != 'up': logging.info('Loading remote changes with sequence ' + str(self.remote_seq)) self.remote_target_seq = self.get_remote_changes(self.remote_seq, remote_changes) else: self.remote_target_seq = 1 self.ping_remote() except ConnectionError as ce: logging.info('No connection detected, waiting %s seconds to retry' % self.offline_timer) self.online_status = False time.sleep(self.offline_timer) continue except Exception as e: logging.info('Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer)) self.online_status = False time.sleep(self.offline_timer) continue self.online_status = True if self.job_config.direction != 'down': logging.info('Loading local changes with sequence ' + str(self.local_seq)) self.local_target_seq = self.db_handler.get_local_changes(self.local_seq, local_changes) else: self.local_target_seq = 1 self.local_seqs = local_changes['data'].keys() #map(lambda x:x['seq'], local_changes) self.remote_seqs = remote_changes['data'].keys() #map(lambda x:x['seq'], remote_changes) logging.info('Reducing changes') conflicts = [] changes = self.reduce_changes(local_changes, remote_changes, conflicts) if len(conflicts): logging.info('Conflicts detected, cannot continue!') self.store_conflicts(conflicts) self.job_status_running = False time.sleep(self.offline_timer) continue if len(changes): logging.info('Processing %i changes' % len(changes)) i = 1 for change in changes: try: self.process_change(change) self.remove_seq(change['seq'], change['location']) except ProcessException as pe: logging.error(pe.message) except OSError as e: logging.error(e.message) #progress_percent = 100 * i / len(changes) progress_percent = "{0:.2f}%".format(float(i)/len(changes) * 100) dispatcher.send(signal=PROGRESS_SIGNAL, sender=self, progress=progress_percent) #self.pub_socket.send_string("sync" + ' ' + str(i) + "/" + str(len(changes)) + " changes done : " + str(progressPercent) + "%") i += 1 if self.interrupt: break time.sleep(0.01) else: logging.info('No changes detected') except OSError as e: logging.error('Type Error! ') logging.info('Finished this cycle, waiting for %i seconds' % self.online_timer) time.sleep(self.online_timer) def remove_seq(self, seq_id, location): if location == 'local': self.local_seqs.remove(seq_id) if len(self.local_seqs): self.local_seq = min(min(self.local_seqs), self.local_target_seq) else: self.local_seq = self.local_target_seq else: self.remote_seqs.remove(seq_id) if len(self.remote_seqs): self.remote_seq = min(min(self.remote_seqs), self.remote_target_seq) else: self.remote_seq = self.remote_target_seq pickle.dump(dict( local=self.local_seq, remote=self.remote_seq ), open(self.data_base + '/sequences', 'wb')) def stat_path(self, path, location, stats=None, with_hash=False): try: if stats: return stats[path] except KeyError: pass if location == 'remote': return self.sdk.stat(path, with_hash) else: return self.system.stat(path, with_hash=True) def ping_remote(self): test = self.sdk.stat('/') return (test != False) def filter_change(self, item, my_stat=None, other_stats=None): location = item['location'] opposite = 'local' if item['location'] == 'remote' else 'remote' res = False if item['type'] == 'create' or item['type'] == 'content': # If it does not exist on remote side, skip test_stat = self.stat_path(item['node']['node_path'], location=opposite, stats=other_stats, with_hash=True) if not test_stat: return False # If it exists but is a directory, it won't change if item['node']['md5'] == 'directory': res = True # If it exists and has same size, ok elif test_stat['size'] == item['node']['bytesize'] and 'hash' in test_stat and test_stat['hash'] == item['node']['md5']: res = True elif item['type'] == 'delete': # Shall we really delete it? loc_stat = self.stat_path(item['source'], location=location, stats=my_stat) if loc_stat: res = True # Shall we delete if already absent? no! test_stat = self.stat_path(item['source'], location=opposite, stats=other_stats) if not test_stat: res = True else:#MOVE source_stat = self.stat_path(item['source'], location=opposite, stats=other_stats) target_stat = self.stat_path(item['target'], location=opposite, stats=other_stats, with_hash=True) if not target_stat or source_stat: return False elif item['node']['md5'] == 'directory': res = True elif target_stat['size'] == item['node']['bytesize'] and 'hash' in target_stat and target_stat['hash'] == item['node']['md5']: res = True if res: if item['type'] != 'delete': logging.info('['+location+'] Filtering out ' + item['type'] + ': ' + item['node']['node_path']) else: logging.info('['+location+'] Filtering out ' + item['type'] + ' ' + item['source']) self.remove_seq(item['seq'], location) return True return False def changes_sorter(self, i1, i2): # no node: delete on top if not i1['node']: return -1 if not i2['node']: return 1 # directory if i1['node']['md5'] == 'directory' and i2['node']['md5'] == 'directory': return cmp(i1['node']['node_path'], i2['node']['node_path']) if i1['node']['md5'] == 'directory': return -1 if i2['node']['md5'] == 'directory': return 1 # sort on path otherwise return cmp(i1['node']['node_path'], i2['node']['node_path']) def info(self, message, toUser=False, channel='sync'): logging.info(message) if toUser and self.pub_socket: self.pub_socket.send_string(channel + "/" + message) def process_localMKDIR(self, path): message = path + ' <============ MKDIR' os.makedirs(self.basepath + path) self.info(message, 'New folder created at '+ path ) def process_remoteMKDIR(self, path): message = 'MKDIR ============> ' + path self.info(message, toUser=False) self.sdk.mkdir(path) def process_localDELETE(self, path): if os.path.isdir(self.basepath + path): self.system.rmdir(path) self.info(path + ' <============ DELETE', 'Deleted folder ' + path) elif os.path.isfile(self.basepath + path): os.unlink(self.basepath + path) self.info(path + ' <============ DELETE', 'Deleted file ' + path) def process_remoteDELETE(self, path): self.sdk.delete(path) self.info('DELETE ============> ' + path, False) def process_localMOVE(self, source, target): if os.path.exists(self.basepath + source): if not os.path.exists(self.basepath + os.path.dirname(target)): os.makedirs(self.basepath + os.path.dirname(target)) os.rename(self.basepath + source, self.basepath + target) self.info(source + ' to ' + target + ' <============ MOVE', 'Moved ' + source + ' to ' + target) return True return False def process_remoteMOVE(self, source, target): self.info('MOVE ============> ' + source + ' to ' + target, toUser=False) self.sdk.rename(source, target) def process_DOWNLOAD(self, path): self.db_handler.update_node_status(path, 'DOWN') self.sdk.download(path, self.basepath + path) self.db_handler.update_node_status(path, 'IDLE') self.info(path + ' <=============== ' + path, 'File ' + path + ' downloaded from server') def process_UPLOAD(self, path): self.db_handler.update_node_status(path, 'UP') self.sdk.upload(self.basepath+path, self.system.stat(path), path) self.db_handler.update_node_status(path, 'IDLE') self.info(path + ' ===============> ' + path, 'File ' + path + ' uploaded to server') def process_change(self, item): location = item['location'] if self.direction == 'up' and location == 'remote': return if self.direction == 'down' and location == 'local': return if item['type'] == 'create' or item['type'] == 'content': if item['node']['md5'] == 'directory': if item['node']['node_path']: logging.info('[' + location + '] Create folder ' + item['node']['node_path']) if location == 'remote': self.process_localMKDIR(item['node']['node_path']) self.db_handler.buffer_real_operation(item['type'], 'NULL', item['node']['node_path']) else: self.process_remoteMKDIR(item['node']['node_path']) else: if item['node']['node_path']: if location == 'remote': self.process_DOWNLOAD(item['node']['node_path']) if item['type'] == 'create': self.db_handler.buffer_real_operation(item['type'], 'NULL', item['node']['node_path']) else: self.db_handler.buffer_real_operation(item['type'], item['node']['node_path'], item['node']['node_path']) else: self.process_UPLOAD(item['node']['node_path']) elif item['type'] == 'delete': logging.info('[' + location + '] Should delete ' + item['source']) if location == 'remote': self.process_localDELETE(item['source']) self.db_handler.buffer_real_operation('delete', item['source'], 'NULL') else: self.process_remoteDELETE(item['source']) else: logging.info('[' + location + '] Should move ' + item['source'] + ' to ' + item['target']) if location == 'remote': if os.path.exists(self.basepath + item['source']): if self.process_localMOVE(item['source'], item['target']): self.db_handler.buffer_real_operation(item['type'], item['source'], item['target']) else: if item["node"]["md5"] == "directory": logging.debug('Cannot find folder to move, switching to creation') self.process_localMKDIR(item['target']) else: logging.debug('Cannot find source, switching to DOWNLOAD') self.process_DOWNLOAD(item['target']) self.db_handler.buffer_real_operation('create', 'NULL', item['target']) else: if self.sdk.stat(item['source']): self.process_remoteMOVE(item['source'], item['target']) elif item['node']['md5'] != 'directory': logging.debug('Cannot find source, switching to UPLOAD') self.process_UPLOAD(item['target']) def reduce_changes(self, local_changes=dict(), remote_changes=dict(), conflicts=[]): rchanges = remote_changes['data'].values() lchanges = local_changes['data'].values() for seq, item in local_changes['data'].items(): pathes = [] if item['source'] != 'NULL': pathes.append(item['source']) if item['target'] != 'NULL': pathes.append(item['target']) # search these pathes in remote_changes remote_sequences = [] for x in pathes: remote_sequences = remote_sequences + remote_changes['path_to_seqs'].setdefault(x, []) for seq_id in remote_sequences: otheritem = remote_changes['data'][seq_id] try: if not (item['type'] == otheritem['type']): continue if not item['node'] and not otheritem['node'] and (item['source'] == otheritem['source']): logging.debug('Reconciliation sequence for change (source)'+item['source']) lchanges.remove(item) rchanges.remove(otheritem) self.remove_seq(item['seq'], 'local') self.remove_seq(otheritem['seq'], 'remote') break if not (os.path.normpath(item['node']['node_path']) == os.path.normpath(otheritem['node']['node_path'])): continue if item['node']['bytesize'] == otheritem['node']['bytesize'] and item['node']['md5'] == otheritem['node']['md5']: logging.debug('Reconciliation sequence for change (node)'+item['node']['node_path']) lchanges.remove(item) rchanges.remove(otheritem) self.remove_seq(item['seq'], 'local') self.remove_seq(otheritem['seq'], 'remote') break except Exception as e: pass test_stats = list(set(map(lambda it: it['source'] if it['source'] != 'NULL' else it['target'], lchanges))) remote_stats = None if len(test_stats): remote_stats = self.sdk.bulk_stat(test_stats, with_hash=True) rchanges = filter(lambda it: not self.filter_change(it, remote_stats, None), rchanges) lchanges = filter(lambda it: not self.filter_change(it, None, remote_stats), lchanges) last_ops = self.db_handler.get_last_operations() new_rchanges = [] for item in lchanges: ignore = False for last in last_ops: if last['type'] == item['type'] and last['source'] == item['source'] and last['target'] == item['target']: logging.info('IGNORING, RECENT MOVE FROM SERVER', last) ignore = True break if ignore: continue conflict = False for rItem in rchanges: if (not item['node'] and not rItem['node'] and rItem['source'] == rItem['source']) or (item['node'] and rItem['node'] and item['node']['node_path'] and rItem['node']['node_path'] and os.path.normpath(item['node']['node_path']) == os.path.normpath(rItem['node']['node_path'])): # Seems there is a conflict - check c_path = item['source'] if item['node']: c_path = item['node']['node_path'] status = self.db_handler.get_node_status(c_path) if status == 'SOLVED:KEEPLOCAL': rchanges.remove(rItem) elif status == 'SOLVED:KEEPREMOTE': conflict = True else: conflict = True rchanges.remove(rItem) conflicts.append({'local':item,'remote':rItem}) break if conflict: continue new_rchanges.append(item) self.db_handler.clear_operations_buffer() # Sort to make sure directory operations are applied first rchanges = sorted(rchanges + new_rchanges, cmp=self.changes_sorter) # Prune changes : for DELETE and MOVE of Dir, remove all childrens toremove = [] for i in range(len(rchanges)): ch = rchanges[i] if ch['type'] == 'path' and not ch['source'] == 'NULL' and not ch['target'] == 'NULL' and ch['node']['md5'] == 'directory': if i < len(rchanges)-1: for j in range(i+1,len(rchanges)): if rchanges[j]['source'] and rchanges[j]['type'] == 'path' and rchanges[j]['source'].startswith(ch['source']+'/'): toremove.append(rchanges[j]) if len(toremove): for r in toremove: if r in rchanges: rchanges.remove(r) return rchanges def store_conflicts(self, conflicts): for conflict in conflicts: local = conflict["local"] remote = conflict["remote"] if local["node"]: path = local["node"]["node_path"] elif local["source"]: path = local["source"] else: path = local["target"] self.db_handler.update_node_status(path, 'CONFLICT', pickle.dumps(remote)) def get_remote_changes(self, seq_id, changes=dict()): logging.debug('Remote sequence ' + str(seq_id)) data = self.sdk.changes(seq_id) for (i, item) in enumerate(data['changes']): item['location'] = 'remote' key = item['source'] if item['source'] != 'NULL' else item['target'] if not key in changes['path_to_seqs']: changes['path_to_seqs'][key] = [] changes['path_to_seqs'][key].append(item['seq']) changes['data'][item['seq']] = item return data['last_seq']
class ContinuousDiffMerger(threading.Thread): """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them""" @pydio_profile def __init__(self, job_config, job_data_path): """ Initialize thread internals :param job_config: JobConfig instance :param job_data_path: Filesystem path where the job data are stored :return: """ threading.Thread.__init__(self) self.last_run = 0 self.configs_path = job_data_path self.job_config = job_config sqlite_files = [ file for file in os.listdir(self.configs_path) if file.endswith(".sqlite") ] for sqlite_file in sqlite_files: try: exists_and_correct = check_sqlite_file( os.path.join(self.configs_path, sqlite_file)) if exists_and_correct: logging.info( "Structure and Integrity of SQLite file %s is intact " % str(os.path.join(self.configs_path, sqlite_file))) except DBCorruptedException as e: logging.debug( "SQLite file %s is corrupted (Reason: %s), Deleting file and Reinitialising sync" % (str(os.path.join(self.configs_path, sqlite_file)), e.message)) os.unlink(os.path.join(self.configs_path, sqlite_file)) self.update_sequences_file(0, 0) self.init_global_progress() self.basepath = job_config.directory self.ws_id = job_config.workspace self.sdk = PydioSdk( job_config.server, ws_id=self.ws_id, remote_folder=job_config.remote_folder, user_id=job_config.user_id, device_id=ConfigManager.Instance().get_device_id(), skip_ssl_verify=job_config.trust_ssl, proxies=ConfigManager.Instance().get_defined_proxies(), timeout=job_config.timeout) self.system = SystemSdk(job_config.directory) self.remote_seq = 0 self.local_seq = 0 self.local_target_seq = 0 self.remote_target_seq = 0 self.local_seqs = [] self.remote_seqs = [] self.db_handler = LocalDbHandler(self.configs_path, job_config.directory) self.interrupt = False self.event_timer = 2 self.online_timer = job_config.online_timer self.offline_timer = 60 self.online_status = True self.job_status_running = True self.direction = job_config.direction self.event_logger = EventLogger(self.configs_path) self.processing_signals = {} self.current_tasks = [] self.event_handler = None self.watcher = None self.watcher_first_run = True # TODO: TO BE LOADED FROM CONFIG self.storage_watcher = job_config.label.startswith('LSYNC') self.wait_for_changes = False # True when no changes detected in last cycle, can be used to disable websockets self.marked_for_snapshot_pathes = [] self.processing = False # indicates whether changes are being processed dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START') if job_config.direction != 'down' or ( self.job_config.direction == 'down' and self.job_config.solve != 'remote'): self.event_handler = SqlEventHandler( includes=job_config.filters['includes'], excludes=job_config.filters['excludes'], basepath=job_config.directory, job_data_path=self.configs_path) self.watcher = LocalWatcher(job_config.directory, self.configs_path, event_handler=self.event_handler) self.db_handler.check_lock_on_event_handler(self.event_handler) if os.path.exists(os.path.join(self.configs_path, "sequences")): try: with open(os.path.join(self.configs_path, "sequences"), "rb") as f: sequences = pickle.load(f) self.remote_seq = sequences['remote'] self.local_seq = sequences['local'] if self.event_handler: self.event_handler.last_seq_id = self.local_seq except Exception as e: logging.exception(e) # Wrong content, remove sequences file. os.unlink(os.path.join(self.configs_path, "sequences")) dispatcher.connect(self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=self.sdk) dispatcher.connect(self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=self.sdk) if self.job_config.frequency == 'manual': self.job_status_running = False self.logger = EventLogger(self.configs_path) # end init def update_sequences_file(self, local_seq, remote_seq): with open(os.path.join(self.configs_path, "sequences"), "wb") as f: pickle.dump(dict(local=local_seq, remote=remote_seq), f) @pydio_profile def handle_transfer_callback_event(self, sender, change): self.processing_signals[change['target']] = change self.global_progress["queue_bytesize"] -= change['bytes_sent'] # The following 3 lines are a dirty fix, only working for one file at a time size relaining... Better than NaN if self.global_progress["queue_bytesize"] < 0: self.global_progress["queue_bytesize"] = abs( self.global_progress["queue_bytesize"]) self.global_progress["queue_bytesize"] = max( change['total_size'] - change['total_bytes_sent'], self.global_progress["queue_bytesize"]) self.global_progress["queue_done"] += float( change['bytes_sent']) / float(change["total_size"]) @pydio_profile def handle_transfer_rate_event(self, sender, transfer_rate): """ Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value. :param sender:Any :param transfer_rate:float :return: """ if self.global_progress['last_transfer_rate'] > 0: self.global_progress['last_transfer_rate'] = ( float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0 else: self.global_progress['last_transfer_rate'] = float(transfer_rate) @pydio_profile def is_running(self): """ Whether the job is in Running state or not. :return:bool """ return self.job_status_running def init_global_progress(self): """ Initialize the internal progress data :return:None """ self.global_progress = { 'status_indexing': 0, 'queue_length': 0, 'queue_done': 0.0, 'queue_bytesize': 0, 'last_transfer_rate': -1, 'queue_start_time': time.clock(), 'total_time': 0 } @pydio_profile def update_global_progress(self, compute_queue_size=True): """ Compute a dict representation with many indications about the current state of the queue :return: dict """ self.global_progress['total_time'] = time.clock( ) - self.global_progress['queue_start_time'] if compute_queue_size: self.global_progress[ "queue_bytesize"] = self.compute_queue_bytesize() # compute an eta eta = -1 if self.global_progress[ 'last_transfer_rate'] > -1 and self.global_progress[ 'queue_bytesize'] > 0: eta = self.global_progress[ 'queue_bytesize'] / self.global_progress['last_transfer_rate'] elif self.global_progress['queue_done']: remaining_operations = self.global_progress[ 'queue_length'] - self.global_progress['queue_done'] eta = remaining_operations * self.global_progress[ 'total_time'] / self.global_progress['queue_done'] self.global_progress['eta'] = eta # logging.info(self.global_progress) return self.global_progress def get_global_progress(self): self.update_global_progress(compute_queue_size=False) return self.global_progress @pydio_profile def update_current_tasks(self, cursor=0, limit=5): """ Get a list of the current tasks :return: list() """ if not hasattr(self, 'current_store'): return [] self.current_tasks = self.current_store.list_changes(cursor, limit) for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] @pydio_profile def get_current_tasks(self): for change in self.current_tasks: if change['target'] in self.processing_signals: progress = self.processing_signals[change['target']] for key in progress.keys(): change[key] = progress[key] return { 'total': self.global_progress['queue_length'], 'current': self.current_tasks } def get_websocket_status(self): try: return self.sdk.waiter and self.sdk.waiter.ws.connected except AttributeError: pass @pydio_profile def compute_queue_bytesize(self): """ Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue. :return:float """ if not hasattr(self, 'current_store'): return 0 total = 0 exclude_pathes = [] for task in self.processing_signals: if 'remaining_bytes' in task: total += float(task['remaining_bytes']) exclude_pathes.append('"' + task['target'] + '"') if len(exclude_pathes): where = "target IN (" + ','.join(exclude_pathes) + ")" return self.current_store.sum_sizes(where) else: return self.current_store.sum_sizes() @pydio_profile def start_now(self): """ Resume task (set it in running mode) and make sure the cycle starts now :return: """ self.last_run = 0 self.sdk.remove_interrupt() self.resume() def pause(self): """ Set the task in pause. The thread is still running, but the cycle does nothing. :return:None """ self.job_status_running = False self.sdk.set_interrupt() self.info(_('Job Paused'), toUser='******', channel='status') @pydio_profile def resume(self): """ Set the task out of pause mode. :return: """ self.job_status_running = True self.sdk.proxies = ConfigManager.Instance().get_defined_proxies() self.sdk.remove_interrupt() self.info(_('Job Started'), toUser='******', channel='status') def stop(self): """ Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop. :return: """ if self.watcher: logging.debug("Stopping watcher: %s" % self.watcher) self.watcher.stop() self.info(_('Job stopping'), toUser='******', channel='status') self.sdk.set_interrupt() self.interrupt = True def sleep_offline(self): """ Sleep the thread for a "long" time (offline time) :return: """ self.online_status = False self.last_run = time.time() time.sleep(self.event_timer) def sleep_online(self): """ Sleep the thread for a "short" time (online time) :return: """ self.online_status = True self.last_run = time.time() time.sleep(self.event_timer) def exit_loop_clean(self, logger): self.current_store.close() self.processing = False self.init_global_progress() logger.log_state(_('Synchronized'), 'success') if self.job_config.frequency == 'manual': self.job_status_running = False self.sleep_offline() else: self.sleep_online() def exit_loop_error(self, message): self.current_store.close() self.init_global_progress() self.logger.log_state(message, 'error') if self.job_config.frequency == 'manual': self.job_status_running = False self.sleep_offline() else: self.sleep_online() @pydio_profile def run(self): """ Start the thread """ very_first = False self.start_watcher() while not self.interrupt: try: # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq)) self.processing_signals = {} self.init_global_progress() if very_first: self.global_progress['status_indexing'] = 1 interval = int(time.time() - self.last_run) if (self.online_status and interval < self.online_timer) or ( not self.online_status and interval < self.offline_timer): time.sleep(self.event_timer) continue if not self.job_status_running: logging.debug("self.online_timer: %s" % self.online_timer) self.logger.log_state(_('Status: Paused'), "sync") self.sleep_offline() continue if self.job_config.frequency == 'time': start_time = datetime.time( int(self.job_config.start_time['h']), int(self.job_config.start_time['m'])) end_time = datetime.time( int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59) now = datetime.datetime.now().time() if not start_time < now < end_time: self.logger.log_state( _('Status: scheduled for %s') % str(start_time), "sync") self.sleep_offline() continue else: logging.info( "Now triggering synchro as expected at time " + str(start_time)) if not self.system.check_basepath(): log = _( 'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry' ) % self.offline_timer logging.error(log) self.logger.log_state( _('Cannot find local folder, did you disconnect a volume?' ), "error") self.sleep_offline() continue # Before starting infinite loop, small check that remote folder still exists if not self.sdk.check_basepath(): # if it doesn't try to create it self.sdk.remote_folder = os.path.join( "/", self.sdk.remote_folder) try: logging.info("Creating remote directory.") self.sdk.mkdir("") continue except Exception as e: logging.exception(e) self.sleep_offline() if not self.sdk.check_basepath(): log = _( 'Cannot find remote folder, maybe it was renamed? Sync cannot start, please check the configuration.' ) logging.error(log) self.logger.log_state(log, 'error') self.sleep_offline() continue if self.watcher: for snap_path in self.marked_for_snapshot_pathes: logging.info( 'LOCAL SNAPSHOT : loading snapshot for directory %s' % snap_path) if self.interrupt or not self.job_status_running: raise InterruptException() self.watcher.check_from_snapshot(snap_path, use_transaction=False) self.marked_for_snapshot_pathes = [] writewait = .5 # To avoid reading events before they're written (db lock) wait for writing to finish totalwait = 0 while self.event_handler and self.event_handler.locked and totalwait < 60 * 3: logging.info( "Waiting for changes to be written before retrieving remote changes." ) if writewait < 5: writewait += .5 time.sleep(writewait) totalwait += writewait # Load local and/or remote changes, depending on the direction self.current_store = SqliteChangeStore( self.configs_path + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes'], self.job_config.poolsize, local_sdk=self.system, remote_sdk=self.sdk, job_config=self.job_config, db_handler=self.db_handler) self.current_store.open() try: if self.job_config.direction != 'up': logging.info( 'Loading remote changes with sequence {0:s} for job id {1:s}' .format(str(self.remote_seq), str(self.job_config.id))) if self.remote_seq == 0: self.logger.log_state( _('Gathering data from remote workspace, this can take a while...' ), 'sync') very_first = True self.remote_target_seq = self.load_remote_changes_in_store( self.remote_seq, self.current_store) self.current_store.sync() else: self.remote_target_seq = 1 self.ping_remote() except RequestException as ce: logging.exception(ce) if not connection_helper.is_connected_to_internet( self.sdk.proxies): error = _( 'No Internet connection detected! Waiting for %s seconds to retry' ) % self.offline_timer else: error = _( 'Connection to server failed, server is probably down. Waiting %s seconds to retry' ) % self.offline_timer self.marked_for_snapshot_pathes = [] logging.error(error) self.logger.log_state(error, "wait") self.current_store.close() self.sleep_offline() continue except PydioSdkBasicAuthException as e: self.logger.log_state(_('Authentication Error'), 'error') self.logger.log_notif(_('Authentication Error'), 'error') self.current_store.close() self.sleep_offline() continue except Exception as e: error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % ( e.message, self.offline_timer) logging.exception(e) self.logger.log_state( _('Error while connecting to remote server (%s)') % e.message, "error") self.marked_for_snapshot_pathes = [] self.current_store.close() self.sleep_offline() continue self.online_status = True if not self.job_config.server_configs: self.job_config.server_configs = self.sdk.load_server_configs( ) self.sdk.set_server_configs(self.job_config.server_configs) if self.job_config.direction != 'down' or ( self.job_config.direction == 'down' and self.job_config.solve != 'remote'): logging.info( 'Loading local changes with sequence {0:s} for job id {1:s}' .format(str(self.local_seq), str(self.job_config.id))) self.local_target_seq = self.db_handler.get_local_changes_as_stream( self.local_seq, self.current_store.flatten_and_store) self.current_store.sync() else: self.local_target_seq = 1 if not connection_helper.internet_ok: connection_helper.is_connected_to_internet( self.sdk.proxies) changes_length = len(self.current_store) if not changes_length: self.processing = False logging.info('No changes detected in ' + self.job_config.id) self.update_min_seqs_from_store() self.wait_for_changes = True self.exit_loop_clean(self.logger) very_first = False # logging.info("CheckSync of " + self.job_config.id) # self.db_handler.list_non_idle_nodes() if not self.watcher.isAlive() and not self.interrupt: logging.info("File watcher died, restarting...") self.watcher.stop() self.watcher = LocalWatcher( self.job_config.directory, self.configs_path, event_handler=self.event_handler) self.start_watcher() continue self.global_progress['status_indexing'] = 1 logging.info('Reducing changes for ' + self.job_config.id) self.logger.log_state( _('Merging changes between remote and local, please wait...' ), 'sync') logging.debug('[CMERGER] DETECTING MOVES ' + self.job_config.id) self.current_store.detect_win_moves() self.update_min_seqs_from_store() logging.debug('[CMERGER] REMOVING ECHO ' + self.job_config.id) self.current_store.clean_echoes() self.update_min_seqs_from_store() # We are updating the status to IDLE here for the nodes which has status as NEW # The reason is when we create a new sync on the existing folder, some of the files might # already be synchronized and we ignore those files while we Dedup changes and those files # remain untouched later. # So the flow of node status change will occur as follows # NEW (as soon as we create a new sync task) # | # IDLE (here, just before we reduce changes) # | # PENDING (those files/folders which remain after reducing changes and to be actually processed) # | # UP / DOWN / CONFLICT (corresponding the operation which occurs) # | # IDLE (The final state once upload/ download happens or once when the conflict is resolved) self.db_handler.update_bulk_node_status_as_idle() logging.debug('[CMERGER] Delete Copies ' + self.job_config.id) self.current_store.delete_copies() self.update_min_seqs_from_store() logging.debug('[CMERGER] Dedup changes ' + self.job_config.id) self.current_store.dedup_changes() self.update_min_seqs_from_store() if not self.storage_watcher or very_first: logging.debug('[CMERGER] Detect unnecessary changes ' + self.ws_id) self.logger.log_state( _('Detecting unnecessary changes...'), 'sync') self.current_store.detect_unnecessary_changes() logging.debug( '[CMERGER] Done detecting unnecessary changes') self.logger.log_state( _('Done detecting unnecessary changes...'), 'sync') self.update_min_seqs_from_store() logging.debug('Clearing op and pruning folders moves ' + self.job_config.id) self.current_store.clear_operations_buffer() self.current_store.prune_folders_moves() self.update_min_seqs_from_store() logging.debug('Store conflicts ' + self.job_config.id) store_conflicts = self.current_store.clean_and_detect_conflicts( self.db_handler) if store_conflicts: if self.job_config.solve == 'both': logging.info('Marking nodes SOLVED:KEEPBOTH') for row in self.db_handler.list_conflict_nodes(): self.db_handler.update_node_status( row['node_path'], 'SOLVED:KEEPBOTH') store_conflicts = self.current_store.clean_and_detect_conflicts( self.db_handler) if self.job_config.solve == 'local': logging.info('Marking nodes SOLVED:KEEPLOCAL') for row in self.db_handler.list_conflict_nodes(): self.db_handler.update_node_status( row['node_path'], 'SOLVED:KEEPLOCAL') store_conflicts = self.current_store.clean_and_detect_conflicts( self.db_handler) if self.job_config.solve == 'remote': logging.info('Marking nodes SOLVED:KEEPREMOTE') for row in self.db_handler.list_conflict_nodes(): self.db_handler.update_node_status( row['node_path'], 'SOLVED:KEEPREMOTE') store_conflicts = self.current_store.clean_and_detect_conflicts( self.db_handler) if store_conflicts: logging.info('Conflicts detected, cannot continue!') self.logger.log_state( _('Conflicts detected, cannot continue!'), 'error') self.current_store.close() self.sleep_offline() self.logger.log_notif( _('Conflicts detected, cannot continue!'), 'error') continue if self.job_config.direction == 'down' and self.job_config.solve != 'remote': self.current_store.remove_based_on_location('local') self.update_min_seqs_from_store() changes_length = len(self.current_store) if not changes_length: logging.info('No changes detected for ' + self.job_config.id) self.exit_loop_clean(self.logger) very_first = False continue self.current_store.update_pending_status( self.db_handler, self.local_seq) self.global_progress['status_indexing'] = 0 import change_processor self.global_progress['queue_length'] = changes_length logging.info('Processing %i changes' % changes_length) self.logger.log_state( _('Processing %i changes') % changes_length, "start") counter = [1] def processor_callback(change): try: if self.interrupt or not self.job_status_running: raise InterruptException() self.update_current_tasks() self.update_global_progress() Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor proc = Processor(change, self.current_store, self.job_config, self.system, self.sdk, self.db_handler, self.event_logger) proc.process_change() self.update_min_seqs_from_store(success=True) self.global_progress['queue_done'] = float(counter[0]) counter[0] += 1 self.update_current_tasks() self.update_global_progress() time.sleep(0.01) if self.interrupt or not self.job_status_running: raise InterruptException() except ProcessException as pe: logging.error(pe.message) return False except InterruptException as i: raise i except PydioSdkDefaultException as p: raise p except Exception as ex: logging.exception(ex) return False return True try: if sys.platform.startswith('win'): self.marked_for_snapshot_pathes = list( set(self.current_store.find_modified_parents()) - set(self.marked_for_snapshot_pathes)) if not self.processing: self.processing = True self.current_store.process_changes_with_callback( processor_callback) # logging.info("Updating seqs") self.current_store.process_pending_changes() self.update_min_seqs_from_store(success=True) self.global_progress['queue_done'] = float(counter[0]) counter[0] += 1 self.update_current_tasks() self.update_global_progress() time.sleep( 0.05) # Allow for changes to be noticeable in UI # logging.info("DONE WITH CHANGES") self.processing = False except InterruptException as iexc: pass self.logger.log_state( _('%i files modified') % self.global_progress['queue_done'], 'success') if self.global_progress['queue_done']: self.logger.log_notif( _('%i files modified') % self.global_progress['queue_done'], 'success') very_first = False self.exit_loop_clean(self.logger) continue except PydioSdkDefaultException as re: logging.error(re.message) self.logger.log_state(re.message, 'error') self.sleep_online() except SSLError as rt: logging.error(rt.message) self.logger.log_state( _('An SSL error happened, please check the logs'), 'error') self.sleep_offline() except ProxyError as rt: logging.error(rt.message) self.logger.log_state( _('A proxy error happened, please check the logs'), 'error') self.sleep_offline() except TooManyRedirects as rt: logging.error(rt.message) self.logger.log_state( _('Connection error: too many redirects'), 'error') self.sleep_offline() except ChunkedEncodingError as rt: logging.error(rt.message) self.logger.log_state( _('Chunked encoding error, please check the logs'), 'error') self.sleep_offline() except ContentDecodingError as rt: logging.error(rt.message) self.logger.log_state( _('Content Decoding error, please check the logs'), 'error') self.sleep_online() except InvalidSchema as rt: logging.error(rt.message) self.logger.log_state( _('Http connection error: invalid schema.'), 'error') self.sleep_offline() except InvalidURL as rt: logging.error(rt.message) self.logger.log_state(_('Http connection error: invalid URL.'), 'error') self.sleep_offline() except Timeout as to: logging.error(to) self.logger.log_state( _('Connection timeout, will retry later.'), 'error') self.sleep_offline() except RequestException as ree: logging.error(ree.message) self.logger.log_state(_('Cannot resolve domain!'), 'error') self.sleep_offline() except PydioSdkBasicAuthException as e: self.logger.log_state(_('Authentication Error'), 'error') self.logger.log_notif(_('Authentication Error'), 'error') self.sleep_offline() except Exception as e: if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')): logging.exception('Unexpected Error: %s' % e.message) self.logger.log_state( _('Unexpected Error: %s') % e.message, 'error') else: logging.exception(e) self.sleep_offline() self.current_store.close() self.processing = False logging.debug('Finished this cycle, waiting for %i seconds' % self.online_timer) very_first = False def start_watcher(self): if self.watcher: if self.watcher_first_run: def status_callback(status): self.logger.log_state(status, 'sync') self.init_global_progress() try: self.global_progress['status_indexing'] = 1 self.logger.log_state( _('Checking changes since last launch...'), "sync") very_first = True self.db_handler.update_bulk_node_status_as_idle() self.watcher.check_from_snapshot( state_callback=status_callback) except DBCorruptedException as e: self.stop() JobsLoader.Instance().clear_job_data(self.job_config.id) logging.error(e) return except Exception as e: logging.exception(e) self.interrupt = True self.logger.log_state( _('Oops, error while indexing the local folder. Pausing the task.' ), 'error') logging.error(e) self.watcher_first_run = False self.watcher.start() @pydio_profile def update_min_seqs_from_store(self, success=False): self.local_seq = self.current_store.get_min_seq('local', success=success) if self.local_seq == -1: self.local_seq = self.local_target_seq self.remote_seq = self.current_store.get_min_seq('remote', success=success) if self.remote_seq == -1: self.remote_seq = self.remote_target_seq # logging.info('Storing sequences remote=%i local=%i', self.remote_seq, self.local_seq) self.update_sequences_file(self.local_seq, self.remote_seq) if self.event_handler: self.event_handler.last_seq_id = self.local_seq @pydio_profile def ping_remote(self): """ Simple stat of the remote server root, to know if it's reachable. :return:bool """ test = self.sdk.stat('/') return (test != False) def info(self, message, toUser=False, channel='sync'): logging.info(message) if toUser: dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message) @pydio_profile def load_remote_changes_in_store(self, seq_id, store): last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store) # if self.wait_for_changes: # timereq = time.time() # try: # self.sdk.websocket_connect(last_seq, str(self.job_config.id)) # # self.sdk.waiter.should_fetch_changes = False # while not self.sdk.waiter.should_fetch_changes and not self.interrupt: # time.sleep(2) # # these break only after one run # if self.local_seq != self.db_handler.get_max_seq(): # # There was a local change # break # if not self.sdk.waiter.ws.connected: # # websocket disconnected # break # except Exception as e: # logging.exception(e) # # if time.time() - timereq > 10: # if last_seq was updated more than 10s ago, update it # last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store) return last_seq