Ejemplo n.º 1
0
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""

    @pydio_profile
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        sqlite_files = [file for file in os.listdir(self.configs_path) if file.endswith(".sqlite")]

        for sqlite_file in sqlite_files:
            try:
                exists_and_correct = check_sqlite_file(os.path.join(self.configs_path, sqlite_file))
                if exists_and_correct:
                    logging.info("Structure and Integrity of SQLite file %s is intact " % str(
                        os.path.join(self.configs_path, sqlite_file)))
            except DBCorruptedException as e:
                logging.debug("SQLite file %s is corrupted (Reason: %s), Deleting file and Reinitialising sync"
                              % (str(os.path.join(self.configs_path, sqlite_file)), e.message))
                os.unlink(os.path.join(self.configs_path, sqlite_file))
                self.update_sequences_file(0, 0)

        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies(),
            timeout=job_config.timeout
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path, job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = job_config.online_timer
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')

        self.marked_for_snapshot_pathes = []
        self.processing = False  # indicates whether changes are being processed

        dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START')
        if job_config.direction != 'down' or (self.job_config.direction == 'down' and self.job_config.solve != 'remote'):
            self.event_handler = SqlEventHandler(includes=job_config.filters['includes'],
                                                 excludes=job_config.filters['excludes'],
                                                 basepath=job_config.directory,
                                                 job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(os.path.join(self.configs_path, "sequences")):
            try:
                with open(os.path.join(self.configs_path, "sequences"), "rb") as f:
                    sequences = pickle.load(f)
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception as e:
                logging.exception(e)
                # Wrong content, remove sequences file.
                os.unlink(os.path.join(self.configs_path, "sequences"))

        dispatcher.connect(self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False
        self.logger = EventLogger(self.configs_path)
    # end init

    def update_sequences_file(self, local_seq, remote_seq):
        with open(os.path.join(self.configs_path, "sequences"), "wb") as f:
            pickle.dump(dict(
                local=local_seq,
                remote=remote_seq
            ), f)

    @pydio_profile
    def handle_transfer_callback_event(self, sender, change):
        self.processing_signals[change['target']] = change
        self.global_progress["queue_bytesize"] -= change['bytes_sent']
        self.global_progress["queue_done"] += float(change['bytes_sent']) / float(change["total_size"])

    @pydio_profile
    def handle_transfer_rate_event(self, sender, transfer_rate):
        """
        Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value.
        :param sender:Any
        :param transfer_rate:float
        :return:
        """
        if self.global_progress['last_transfer_rate'] > 0:
            self.global_progress['last_transfer_rate'] = (float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0
        else:
            self.global_progress['last_transfer_rate'] = float(transfer_rate)

    @pydio_profile
    def is_running(self):
        """
        Whether the job is in Running state or not.
        :return:bool
        """
        return self.job_status_running

    def init_global_progress(self):
        """
        Initialize the internal progress data
        :return:None
        """
        self.global_progress = {
            'status_indexing'   :0,
            'queue_length'      :0,
            'queue_done'        :0.0,
            'queue_bytesize'    :0,
            'last_transfer_rate':-1,
            'queue_start_time'  :time.clock(),
            'total_time'        :0
        }

    @pydio_profile
    def update_global_progress(self, compute_queue_size=True):
        """
        Compute a dict representation with many indications about the current state of the queue
        :return: dict
        """
        self.global_progress['total_time'] = time.clock() - self.global_progress['queue_start_time']
        if compute_queue_size:
            self.global_progress["queue_bytesize"] = self.compute_queue_bytesize()
        # compute an eta
        eta = -1
        if self.global_progress['last_transfer_rate'] > -1 and self.global_progress['queue_bytesize'] > 0:
            eta = self.global_progress['queue_bytesize'] / self.global_progress['last_transfer_rate']
        elif self.global_progress['queue_done']:
            remaining_operations = self.global_progress['queue_length'] - self.global_progress['queue_done']
            eta = remaining_operations * self.global_progress['total_time'] / self.global_progress['queue_done']

        self.global_progress['eta'] = eta

        #logging.info(self.global_progress)
        return self.global_progress

    def get_global_progress(self):
        self.update_global_progress(compute_queue_size=False)
        return self.global_progress

    @pydio_profile
    def update_current_tasks(self, cursor=0, limit=5):
        """
        Get a list of the current tasks
        :return: list()
        """
        if not hasattr(self, 'current_store'):
            return []
        self.current_tasks = self.current_store.list_changes(cursor, limit)
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]

    @pydio_profile
    def get_current_tasks(self):
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]
        return {
            'total': self.global_progress['queue_length'],
            'current': self.current_tasks
        }

    @pydio_profile
    def compute_queue_bytesize(self):
        """
        Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue.
        :return:float
        """
        if not hasattr(self, 'current_store'):
            return 0
        total = 0
        exclude_pathes = []
        for task in self.processing_signals:
            if 'remaining_bytes' in task:
                total += float(task['remaining_bytes'])
                exclude_pathes.append('"' + task['target'] + '"')
        if len(exclude_pathes):
            where = "target IN (" + ','.join(exclude_pathes) + ")"
            return self.current_store.sum_sizes(where)
        else:
            return self.current_store.sum_sizes()

    @pydio_profile
    def start_now(self):
        """
        Resume task (set it in running mode) and make sure the cycle starts now
        :return:
        """
        self.last_run = 0
        self.sdk.remove_interrupt()
        self.resume()

    def pause(self):
        """
        Set the task in pause. The thread is still running, but the cycle does nothing.
        :return:None
        """
        self.job_status_running = False
        self.sdk.set_interrupt()
        self.info(_('Job Paused'), toUser='******', channel='status')

    @pydio_profile
    def resume(self):
        """
        Set the task out of pause mode.
        :return:
        """
        self.job_status_running = True
        self.sdk.proxies = ConfigManager.Instance().get_defined_proxies()
        self.sdk.remove_interrupt()
        self.info(_('Job Started'), toUser='******', channel='status')

    def stop(self):
        """
        Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop.
        :return:
        """
        if self.watcher:
            logging.debug("Stopping watcher: %s" % self.watcher)
            self.watcher.stop()
        self.info(_('Job stopping'), toUser='******', channel='status')
        self.sdk.set_interrupt()
        self.interrupt = True

    def sleep_offline(self):
        """
        Sleep the thread for a "long" time (offline time)
        :return:
        """
        self.online_status = False
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def sleep_online(self):
        """
        Sleep the thread for a "short" time (online time)
        :return:
        """
        self.online_status = True
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def exit_loop_clean(self, logger):
        #self.marked_for_snapshot_pathes = []
        self.current_store.close()
        self.init_global_progress()
        logger.log_state(_('Synchronized'), 'success')
        if self.job_config.frequency == 'manual':
            self.job_status_running = False
            self.sleep_offline()
        else:
            self.sleep_online()


    @pydio_profile
    def run(self):
        """
        Start the thread
        """
        very_first = False
        self.start_watcher()

        while not self.interrupt:
            try:
                # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq))
                self.processing_signals = {}
                self.init_global_progress()
                if very_first:
                    self.global_progress['status_indexing'] = 1

                interval = int(time.time() - self.last_run)
                if (self.online_status and interval < self.online_timer) or (not self.online_status and interval < self.offline_timer):
                    time.sleep(self.event_timer)
                    continue

                if not self.job_status_running:
                    logging.debug("self.online_timer: %s" % self.online_timer)
                    self.logger.log_state(_('Status: Paused'), "sync")
                    self.sleep_offline()
                    continue

                if self.job_config.frequency == 'time':
                    start_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']))
                    end_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59)
                    now = datetime.datetime.now().time()
                    if not start_time < now < end_time:
                        self.logger.log_state(_('Status: scheduled for %s') % str(start_time), "sync")
                        self.sleep_offline()
                        continue
                    else:
                        logging.info("Now triggering synchro as expected at time " + str(start_time))

                if not self.system.check_basepath():
                    log = _('Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry') % self.offline_timer
                    logging.error(log)
                    self.logger.log_state(_('Cannot find local folder, did you disconnect a volume?'), "error")
                    self.sleep_offline()
                    continue

                # Before starting infinite loop, small check that remote folder still exists
                if not self.sdk.check_basepath():
                    # if it doesn't try to create it
                    self.sdk.remote_folder = os.path.join("/", self.sdk.remote_folder)
                    try:
                        logging.info("Creating remote directory.")
                        self.sdk.mkdir("")
                        continue
                    except Exception as e:
                        logging.exception(e)
                    if not self.sdk.check_basepath():
                        log = _('Cannot find remote folder, maybe it was renamed? Sync cannot start, please check the configuration.')
                        logging.error(log)
                        self.logger.log_state(log, 'error')
                        self.sleep_offline()
                        continue

                if self.watcher:
                    for snap_path in self.marked_for_snapshot_pathes:
                        logging.info('LOCAL SNAPSHOT : loading snapshot for directory %s' % snap_path)
                        if self.interrupt or not self.job_status_running:
                                                        raise InterruptException()
                        self.watcher.check_from_snapshot(snap_path)
                    self.marked_for_snapshot_pathes = []

                writewait = .5  # To avoid reading events before they're written (db lock) wait for writing to finish
                while self.event_handler.locked:
                    logging.info("Waiting for changes to be written before retrieving remote changes.")
                    if writewait < 5:
                        writewait += .5
                    time.sleep(writewait)
                # Load local and/or remote changes, depending on the direction
                self.current_store = SqliteChangeStore(self.configs_path + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes'], self.job_config.poolsize, local_sdk=self.system, remote_sdk=self.sdk, job_config=self.job_config)
                self.current_store.open()
                try:
                    if self.job_config.direction != 'up':
                        logging.info(
                            'Loading remote changes with sequence {0:s} for job id {1:s}'.format(str(self.remote_seq),
                                                                                                   str(self.job_config.id)))
                        if self.remote_seq == 0:
                            self.logger.log_state(_('Gathering data from remote workspace, this can take a while...'), 'sync')
                            very_first = True
                        self.remote_target_seq = self.load_remote_changes_in_store(self.remote_seq, self.current_store)
                        self.current_store.sync()
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except RequestException as ce:
                    logging.exception(ce)
                    if not connection_helper.is_connected_to_internet(self.sdk.proxies):
                        error = _('No Internet connection detected! Waiting for %s seconds to retry') % self.offline_timer
                    else:
                        error = _('Connection to server failed, server is probably down. Waiting %s seconds to retry') % self.offline_timer
                    self.marked_for_snapshot_pathes = []
                    logging.error(error)
                    self.logger.log_state(error, "wait")
                    self.sleep_offline()
                    continue
                except Exception as e:
                    error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer)
                    logging.exception(e)
                    self.logger.log_state(_('Error while connecting to remote server (%s)') % e.message, "error")
                    self.marked_for_snapshot_pathes = []
                    self.sleep_offline()
                    continue
                self.online_status = True
                if not self.job_config.server_configs:
                    self.job_config.server_configs = self.sdk.load_server_configs()
                self.sdk.set_server_configs(self.job_config.server_configs)

                if self.job_config.direction != 'down' or (self.job_config.direction == 'down' and self.job_config.solve != 'remote'):
                    logging.info(
                        'Loading local changes with sequence {0:s} for job id {1:s}'.format(str(self.local_seq),
                                                                                              str(self.job_config.id)))
                    self.local_target_seq = self.db_handler.get_local_changes_as_stream(self.local_seq, self.current_store.flatten_and_store)
                    self.current_store.sync()
                else:
                    self.local_target_seq = 1
                if not connection_helper.internet_ok:
                    connection_helper.is_connected_to_internet(self.sdk.proxies)

                changes_length = len(self.current_store)
                if not changes_length:
                    self.processing = False
                    logging.info('No changes detected in ' + self.job_config.id)
                    self.update_min_seqs_from_store()
                    self.exit_loop_clean(self.logger)
                    very_first = False
                    #logging.info("CheckSync of " + self.job_config.id)
                    #self.db_handler.list_non_idle_nodes()
                    if not self.watcher.isAlive() and not self.interrupt:
                        logging.info("File watcher died, restarting...")
                        self.watcher.stop()
                        self.watcher = LocalWatcher(self.job_config.directory,
                                                    self.configs_path,
                                                    event_handler=self.event_handler)
                        self.start_watcher()
                    continue

                self.global_progress['status_indexing'] = 1
                logging.info('Reducing changes for ' + self.job_config.id)
                self.logger.log_state(_('Merging changes between remote and local, please wait...'), 'sync')

                # We are updating the status to IDLE here for the nodes which has status as NEW
                # The reason is when we create a new sync on the existing folder, some of the files might
                # already be synchronized and we ignore those files while we Dedup changes and those files
                # remain untouched later.
                # So the flow of node status change will occur as follows
                # NEW (as soon as we create a new sync task)
                #  |
                # IDLE (here, just before we reduce changes)
                #  |
                # PENDING (those files/folders which remain after reducing changes and to be actually processed)
                #  |
                # UP / DOWN / CONFLICT (corresponding the operation which occurs)
                #  |
                # IDLE (The final state once upload/ download happens or once when the conflict is resolved)
                self.db_handler.update_bulk_node_status_as_idle()

                logging.debug('[CMERGER] Delete Copies ' + self.job_config.id)
                self.current_store.delete_copies()
                self.update_min_seqs_from_store()
                logging.debug('[CMERGER] Dedup changes ' + self.job_config.id)
                self.current_store.dedup_changes()
                self.update_min_seqs_from_store()
                if not self.storage_watcher or very_first:
                    logging.debug('[CMERGER] Detect unnecessary changes ' + self.ws_id)
                    self.logger.log_state(_('Detecting unecessary changes...'), 'sync')
                    self.current_store.detect_unnecessary_changes()
                    logging.debug('[CMERGER] Done detecting unnecessary changes')
                    self.logger.log_state(_('Done detecting unecessary changes...'), 'sync')
                self.update_min_seqs_from_store()
                logging.debug('Clearing op and pruning folders moves ' + self.job_config.id)
                self.current_store.clear_operations_buffer()
                self.current_store.prune_folders_moves()
                self.update_min_seqs_from_store()

                logging.debug('Store conflicts ' + self.job_config.id)
                store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler)
                if store_conflicts:
                    if self.job_config.solve == 'both':
                        logging.info('Marking nodes SOLVED:KEEPBOTH')
                        for row in self.db_handler.list_conflict_nodes():
                            self.db_handler.update_node_status(row['node_path'], 'SOLVED:KEEPBOTH')
                        store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler)
                if store_conflicts:
                    logging.info('Conflicts detected, cannot continue!')
                    self.logger.log_state(_('Conflicts detected, cannot continue!'), 'error')
                    self.current_store.close()
                    self.sleep_offline()
                    self.logger.log_notif(_('Conflicts detected, cannot continue!'), 'error')
                    continue

                if self.job_config.direction == 'down' and self.job_config.solve != 'remote':
                    self.current_store.remove_based_on_location('local')
                    self.update_min_seqs_from_store()

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected for ' + self.job_config.id)
                    self.exit_loop_clean(self.logger)
                    very_first = False
                    continue

                self.current_store.update_pending_status(self.db_handler, self.local_seq)

                self.global_progress['status_indexing'] = 0
                import change_processor
                self.global_progress['queue_length'] = changes_length
                logging.info('Processing %i changes' % changes_length)
                self.logger.log_state(_('Processing %i changes') % changes_length, "start")
                counter = [1]
                def processor_callback(change):
                    try:
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.update_current_tasks()
                        self.update_global_progress()
                        Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor
                        proc = Processor(change, self.current_store, self.job_config, self.system, self.sdk,
                                               self.db_handler, self.event_logger)
                        proc.process_change()
                        self.update_min_seqs_from_store(success=True)
                        self.global_progress['queue_done'] = float(counter[0])
                        counter[0] += 1
                        self.update_current_tasks()
                        self.update_global_progress()
                        time.sleep(0.1)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()

                    except ProcessException as pe:
                        logging.error(pe.message)
                        return False
                    except InterruptException as i:
                        raise i
                    except PydioSdkDefaultException as p:
                        raise p
                    except Exception as ex:
                        logging.exception(ex)
                        return False
                    return True
                def processor_callback2(change):
                    try:
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor
                        proc = Processor(change, self.current_store, self.job_config, self.system, self.sdk,
                                               self.db_handler, self.event_logger)
                        proc.process_change()
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                    except PydioSdkException as pe:
                        if pe.message.find("Original file") > -1:
                            pe.code = 1404
                            raise pe
                    except ProcessException as pe:
                        logging.error(pe.message)
                        return False
                    except PydioSdkDefaultException as p:
                        raise p
                    except InterruptException as i:
                        raise i
                    return True

                try:
                    if sys.platform.startswith('win'):
                        self.marked_for_snapshot_pathes = list(set(self.current_store.find_modified_parents()) - set(self.marked_for_snapshot_pathes))
                    if not self.processing:
                        self.processing = True
                        for i in self.current_store.process_changes_with_callback(processor_callback, processor_callback2):
                            if self.interrupt:
                                raise InterruptException
                            #logging.info("Updating seqs")
                            self.current_store.process_pending_changes()
                            self.update_min_seqs_from_store(success=True)
                            self.global_progress['queue_done'] = float(counter[0])
                            counter[0] += 1
                            self.update_current_tasks()
                            self.update_global_progress()
                            time.sleep(0.05)  # Allow for changes to be noticeable in UI
                        time.sleep(.5)
                        self.current_store.process_pending_changes()
                        self.update_min_seqs_from_store(success=True)
                        self.update_current_tasks()
                        self.update_global_progress()
                        #logging.info("DONE WITH CHANGES")
                        self.processing = False

                except InterruptException as iexc:
                    pass
                self.logger.log_state(_('%i files modified') % self.global_progress['queue_done'], 'success')
                if self.global_progress['queue_done']:
                    self.logger.log_notif(_('%i files modified') % self.global_progress['queue_done'], 'success')

                self.exit_loop_clean(self.logger)

            except PydioSdkDefaultException as re:
                logging.error(re.message)
                self.logger.log_state(re.message, 'error')
            except SSLError as rt:
                logging.error(rt.message)
                self.logger.log_state(_('An SSL error happened, please check the logs'), 'error')
            except ProxyError as rt:
                logging.error(rt.message)
                self.logger.log_state(_('A proxy error happened, please check the logs'), 'error')
            except TooManyRedirects as rt:
                logging.error(rt.message)
                self.logger.log_state(_('Connection error: too many redirects'), 'error')
            except ChunkedEncodingError as rt:
                logging.error(rt.message)
                self.logger.log_state(_('Chunked encoding error, please check the logs'), 'error')
            except ContentDecodingError as rt:
                logging.error(rt.message)
                self.logger.log_state(_('Content Decoding error, please check the logs'), 'error')
            except InvalidSchema as rt:
                logging.error(rt.message)
                self.logger.log_state(_('Http connection error: invalid schema.'), 'error')
            except InvalidURL as rt:
                logging.error(rt.message)
                self.logger.log_state(_('Http connection error: invalid URL.'), 'error')
            except Timeout as to:
                logging.error(to)
                self.logger.log_state(_('Connection timeout, will retry later.'), 'error')
            except RequestException as ree:
                logging.error(ree.message)
                self.logger.log_state(_('Cannot resolve domain!'), 'error')
            except Exception as e:
                if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')):
                    logging.exception('Unexpected Error: %s' % e.message)
                    self.logger.log_state(_('Unexpected Error: %s') % e.message, 'error')
                else:
                    logging.exception(e)
                self.sleep_offline()
            logging.debug('Finished this cycle, waiting for %i seconds' % self.online_timer)
            very_first = False

    def start_watcher(self):
        if self.watcher:
            if self.watcher_first_run:
                def status_callback(status):
                    self.logger.log_state(status, 'sync')
                self.init_global_progress()

                try:
                    self.global_progress['status_indexing'] = 1
                    self.logger.log_state(_('Checking changes since last launch...'), "sync")
                    very_first = True
                    self.db_handler.update_bulk_node_status_as_idle()
                    self.watcher.check_from_snapshot(state_callback=status_callback)
                except DBCorruptedException as e:
                    self.stop()
                    JobsLoader.Instance().clear_job_data(self.job_config.id)
                    logging.error(e)
                    return
                except Exception as e:
                    logging.exception(e)
                    self.interrupt = True
                    self.logger.log_state(_('Oops, error while indexing the local folder. Pausing the task.'), 'error')
                    logging.error(e)

                self.watcher_first_run = False
            self.watcher.start()

    @pydio_profile
    def update_min_seqs_from_store(self, success=False):
        self.local_seq = self.current_store.get_min_seq('local', success=success)
        if self.local_seq == -1:
            self.local_seq = self.local_target_seq
        self.remote_seq = self.current_store.get_min_seq('remote', success=success)
        if self.remote_seq == -1:
            self.remote_seq = self.remote_target_seq
        #logging.info('Storing sequences remote=%i local=%i', self.remote_seq, self.local_seq)
        self.update_sequences_file(self.local_seq, self.remote_seq)
        if self.event_handler:
            self.event_handler.last_seq_id = self.local_seq

    @pydio_profile
    def ping_remote(self):
        """
        Simple stat of the remote server root, to know if it's reachable.
        :return:bool
        """
        test = self.sdk.stat('/')
        return (test != False)

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser:
            dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message)

    @pydio_profile
    def load_remote_changes_in_store(self, seq_id, store):
        last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        """try:
            self.sdk.websocket_send()
        except Exception as e:
            logging.exception(e)"""
        return last_seq
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""

    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.data_base = job_data_path
        self.job_config = job_config
        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.data_base, job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.data_base)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(includes=job_config.filters['includes'],
                                                 excludes=job_config.filters['excludes'],
                                                 basepath=job_config.directory,
                                                 job_data_path=job_data_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        job_data_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.data_base + "/sequences"):
            try:
                sequences = pickle.load(open(self.data_base + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.data_base + "/sequences")


        dispatcher.connect( self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=dispatcher.Any )
        dispatcher.connect( self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=dispatcher.Any )

        if self.job_config.frequency == 'manual':
            self.job_status_running = False


    def handle_transfer_callback_event(self, sender, change):
        self.processing_signals[change['target']] = change
        self.global_progress["queue_bytesize"] -= change['bytes_sent']
        self.global_progress["queue_done"] += float(change['bytes_sent']) / float(change["total_size"])

    def handle_transfer_rate_event(self, sender, transfer_rate):
        """
        Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value.
        :param sender:Any
        :param transfer_rate:float
        :return:
        """
        if self.global_progress['last_transfer_rate'] > 0:
            self.global_progress['last_transfer_rate'] = (float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0
        else:
            self.global_progress['last_transfer_rate'] = float(transfer_rate)

    def is_running(self):
        """
        Whether the job is in Running state or not.
        :return:bool
        """
        return self.job_status_running

    def init_global_progress(self):
        """
        Initialize the internal progress data
        :return:None
        """
        self.global_progress = {
            'queue_length'      :0,
            'queue_done'        :0.0,
            'queue_bytesize'    :0,
            'last_transfer_rate':-1,
            'queue_start_time'  :time.clock(),
            'total_time'        :0
        }

    def update_global_progress(self):
        """
        Compute a dict representation with many indications about the current state of the queue
        :return: dict
        """
        self.global_progress['total_time'] = time.clock() - self.global_progress['queue_start_time']
        self.global_progress["queue_bytesize"] = self.compute_queue_bytesize()
        # compute an eta
        eta = -1
        if self.global_progress['last_transfer_rate'] > -1 and self.global_progress['queue_bytesize'] > 0 :
            eta = self.global_progress['queue_bytesize'] / self.global_progress['last_transfer_rate']
        elif self.global_progress['queue_done']:
            remaining_operations = self.global_progress['queue_length'] - self.global_progress['queue_done']
            eta = remaining_operations * self.global_progress['total_time'] / self.global_progress['queue_done']

        self.global_progress['eta'] = eta

        #logging.info(self.global_progress)
        return self.global_progress

    def get_global_progress(self):
        return self.global_progress

    def update_current_tasks(self, cursor=0, limit=5):
        """
        Get a list of the current tasks
        :return: list()
        """
        if not hasattr(self, 'current_store'):
            return []
        self.current_tasks = self.current_store.list_changes(cursor, limit)
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]

    def get_current_tasks(self):
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]
        return {
            'total': self.global_progress['queue_length'],
            'current': self.current_tasks
        }

    def compute_queue_bytesize(self):
        """
        Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue.
        :return:float
        """
        if not hasattr(self, 'current_store'):
            return 0
        total = 0
        exclude_pathes = []
        for task in self.processing_signals:
            if 'remaining_bytes' in task:
                total += float(task['remaining_bytes'])
                exclude_pathes.append('"' + task['target'] + '"')
        where = ''
        if len(exclude_pathes):
            where = "target IN (" + ','.join(exclude_pathes) + ")"
            return self.current_store.sum_sizes(where)
        else:
            return self.current_store.sum_sizes()

    def start_now(self):
        """
        Resume task (set it in running mode) and make sure the cycle starts now
        :return:
        """
        self.last_run = 0
        self.resume()

    def pause(self):
        """
        Set the task in pause. The thread is still running, but the cycle does nothing.
        :return:None
        """
        self.job_status_running = False
        self.info('Job Paused', toUser='******', channel='status')

    def resume(self):
        """
        Set the task out of pause mode.
        :return:
        """
        self.job_status_running = True
        self.info('Job Started', toUser='******', channel='status')

    def stop(self):
        """
        Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop.
        :return:
        """
        if hasattr(self, 'watcher'):
            logging.debug("Stopping watcher: %s" % self.watcher)
            self.watcher.stop()
        self.info('Job stopping', toUser='******', channel='status')
        self.interrupt = True

    def sleep_offline(self):
        """
        Sleep the thread for a "long" time (offline time)
        :return:
        """
        self.online_status = False
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def sleep_online(self):
        """
        Sleep the thread for a "short" time (online time)
        :return:
        """
        self.online_status = True
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def run(self):
        """
        Start the thread
        """
        logger = EventLogger(self.data_base)
        very_first = False

        if self.watcher:
            if self.watcher_first_run:
                logger.log_state('Checking changes since last launch...', "sync")
                very_first = True
                self.watcher.check_from_snapshot()
                self.watcher_first_run = False
            self.watcher.start()

        while not self.interrupt:

            try:
                # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq))
                self.processing_signals = {}
                self.init_global_progress()
                interval = int(time.time() - self.last_run)
                if (self.online_status and interval < self.online_timer) or (not self.online_status and interval < self.offline_timer):
                    time.sleep(self.event_timer)
                    continue

                if not self.job_status_running:
                    logging.debug("self.online_timer: %s" % self.online_timer)
                    logger.log_state('Status: Paused', "sync")
                    self.sleep_offline()
                    continue

                if self.job_config.frequency == 'time':
                    start_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']))
                    end_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59)
                    now = datetime.datetime.now().time()
                    if not start_time < now < end_time:
                        logger.log_state('Status: scheduled for %s' % str(start_time), "sync")
                        self.sleep_offline()
                        continue
                    else:
                        logging.info("Now triggering synchro as expected at time " + start_time)

                if not self.system.check_basepath():
                    log = 'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry' % self.offline_timer
                    logging.error(log)
                    logger.log_state('Cannot find local folder, did you disconnect a volume?', "error")
                    self.sleep_offline()
                    continue

                # Load local and/or remote changes, depending on the direction
                from pydio.job.change_stores import SqliteChangeStore
                self.current_store = SqliteChangeStore(self.data_base + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes'])
                self.current_store.open()
                try:
                    if self.job_config.direction != 'up':
                        logging.info('Loading remote changes with sequence ' + str(self.remote_seq))
                        if self.remote_seq == 0:
                            logger.log_state('Gathering data from remote workspace, this can take a while...', 'sync')
                            very_first = True
                        self.remote_target_seq = self.load_remote_changes_in_store(self.remote_seq, self.current_store)
                        self.current_store.sync()
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except ConnectionError as ce:
                    error = 'No connection detected, waiting %s seconds to retry' % self.offline_timer
                    logging.error(error)
                    logger.log_state(error, "wait")
                    self.sleep_offline()
                    continue
                except Exception as e:
                    error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer)
                    logging.error(error)
                    logger.log_state('Error while connecting to remote server (%s)' % e.message, "error")
                    self.sleep_offline()
                    continue
                self.online_status = True
                if not self.job_config.server_configs:
                    self.job_config.server_configs = self.sdk.load_server_configs()
                self.sdk.set_server_configs(self.job_config.server_configs)

                if self.job_config.direction != 'down':
                    logging.info('Loading local changes with sequence ' + str(self.local_seq))
                    self.local_target_seq = self.db_handler.get_local_changes_as_stream(self.local_seq, self.current_store.flatten_and_store)
                    self.current_store.sync()
                else:
                    self.local_target_seq = 1

                logging.info('Reducing changes')

                self.current_store.delete_copies()
                self.update_min_seqs_from_store()
                self.current_store.dedup_changes()
                self.update_min_seqs_from_store()
                self.current_store.detect_unnecessary_changes(local_sdk=self.system, remote_sdk=self.sdk)
                self.update_min_seqs_from_store()
                #self.current_store.filter_out_echoes_events()
                #self.update_min_seqs_from_store()
                self.current_store.clear_operations_buffer()
                self.current_store.prune_folders_moves()
                self.update_min_seqs_from_store()

                store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler)
                if store_conflicts:
                    logging.info('Conflicts detected, cannot continue!')
                    logger.log_state('Conflicts detected, cannot continue!', 'error')
                    self.current_store.close()
                    self.sleep_offline()
                    continue

                changes_length = len(self.current_store)
                if changes_length:
                    import change_processor
                    self.global_progress['queue_length'] = changes_length
                    logging.info('Processing %i changes' % changes_length)
                    logger.log_state('Processing %i changes' % changes_length, "start")
                    counter = [1]
                    def processor_callback(change):
                        try:
                            if self.interrupt or not self.job_status_running:
                                raise InterruptException()
                            self.update_current_tasks()
                            self.update_global_progress()
                            proc = ChangeProcessor(change, self.current_store, self.job_config, self.system, self.sdk,
                                                   self.db_handler, self.event_logger)
                            proc.process_change()
                            self.update_min_seqs_from_store(success=True)
                            self.global_progress['queue_done'] = float(counter[0])
                            counter[0] += 1
                            self.update_current_tasks()
                            self.update_global_progress()
                            time.sleep(0.1)
                            if self.interrupt or not self.job_status_running:
                                raise InterruptException()

                        except ProcessException as pe:
                            logging.error(pe.message)
                            return False
                        except InterruptException as i:
                            raise i
                        except PydioSdkDefaultException as p:
                            raise p
                        except Exception as ex:
                            logging.exception(ex.message)
                            return False
                        return True

                    try:
                        self.current_store.process_changes_with_callback(processor_callback)
                    except InterruptException as iexc:
                        pass
                    logger.log_state('%i files modified' % self.global_progress['queue_done'], "success")
                else:
                    logging.info('No changes detected')
                    if very_first:
                        logger.log_state('Remote and local are synchronized', 'success')

            except PydioSdkDefaultException as re:
                logging.warning(re.message)
                logger.log_state(re.message, 'error')
            except Exception as e:
                if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')):
                    logging.exception('Unexpected Error: %s' % e.message)
                    logger.log_state('Unexpected Error: %s' % e.message, 'error')


            logging.info('Finished this cycle, waiting for %i seconds' % self.online_timer)
            self.current_store.close()
            self.init_global_progress()
            if self.job_config.frequency == 'manual':
                self.job_status_running = False
                self.sleep_offline()
            else:
                self.sleep_online()

    def update_min_seqs_from_store(self, success=False):
        self.local_seq = self.current_store.get_min_seq('local', success=success)
        if self.local_seq == -1:
            self.local_seq = self.local_target_seq
        self.remote_seq = self.current_store.get_min_seq('remote', success=success)
        if self.remote_seq == -1:
            self.remote_seq = self.remote_target_seq
        logging.debug('Storing sequences remote %i local %i', self.local_seq, self.remote_seq)
        pickle.dump(dict(
            local=self.local_seq,
            remote=self.remote_seq
        ), open(self.data_base + '/sequences', 'wb'))
        if self.event_handler:
            self.event_handler.last_seq_id = self.local_seq

    def ping_remote(self):
        """
        Simple stat of the remote server root, to know if it's reachable.
        :return:bool
        """
        test = self.sdk.stat('/')
        return (test != False)

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser:
            dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message)

    def load_remote_changes_in_store(self, seq_id, store):
        last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        return last_seq
Ejemplo n.º 3
0
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies())
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path,
                                         job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')

        self.marked_for_snapshot_pathes = []

        dispatcher.send(signal=PUBLISH_SIGNAL,
                        sender=self,
                        channel='status',
                        message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(
                includes=job_config.filters['includes'],
                excludes=job_config.filters['excludes'],
                basepath=job_config.directory,
                job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.configs_path + "/sequences"):
            try:
                sequences = pickle.load(
                    open(self.configs_path + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.configs_path + "/sequences")

        dispatcher.connect(self.handle_transfer_rate_event,
                           signal=TRANSFER_RATE_SIGNAL,
                           sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event,
                           signal=TRANSFER_CALLBACK_SIGNAL,
                           sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False

    def handle_transfer_callback_event(self, sender, change):
        self.processing_signals[change['target']] = change
        self.global_progress["queue_bytesize"] -= change['bytes_sent']
        self.global_progress["queue_done"] += float(
            change['bytes_sent']) / float(change["total_size"])

    def handle_transfer_rate_event(self, sender, transfer_rate):
        """
        Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value.
        :param sender:Any
        :param transfer_rate:float
        :return:
        """
        if self.global_progress['last_transfer_rate'] > 0:
            self.global_progress['last_transfer_rate'] = (
                float(transfer_rate) +
                self.global_progress['last_transfer_rate']) / 2.0
        else:
            self.global_progress['last_transfer_rate'] = float(transfer_rate)

    def is_running(self):
        """
        Whether the job is in Running state or not.
        :return:bool
        """
        return self.job_status_running

    def init_global_progress(self):
        """
        Initialize the internal progress data
        :return:None
        """
        self.global_progress = {
            'status_indexing': 0,
            'queue_length': 0,
            'queue_done': 0.0,
            'queue_bytesize': 0,
            'last_transfer_rate': -1,
            'queue_start_time': time.clock(),
            'total_time': 0
        }

    def update_global_progress(self, compute_queue_size=True):
        """
        Compute a dict representation with many indications about the current state of the queue
        :return: dict
        """
        self.global_progress['total_time'] = time.clock(
        ) - self.global_progress['queue_start_time']
        if compute_queue_size:
            self.global_progress[
                "queue_bytesize"] = self.compute_queue_bytesize()
        # compute an eta
        eta = -1
        if self.global_progress[
                'last_transfer_rate'] > -1 and self.global_progress[
                    'queue_bytesize'] > 0:
            eta = self.global_progress[
                'queue_bytesize'] / self.global_progress['last_transfer_rate']
        elif self.global_progress['queue_done']:
            remaining_operations = self.global_progress[
                'queue_length'] - self.global_progress['queue_done']
            eta = remaining_operations * self.global_progress[
                'total_time'] / self.global_progress['queue_done']

        self.global_progress['eta'] = eta

        #logging.info(self.global_progress)
        return self.global_progress

    def get_global_progress(self):
        self.update_global_progress(compute_queue_size=False)
        return self.global_progress

    def update_current_tasks(self, cursor=0, limit=5):
        """
        Get a list of the current tasks
        :return: list()
        """
        if not hasattr(self, 'current_store'):
            return []
        self.current_tasks = self.current_store.list_changes(cursor, limit)
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]

    def get_current_tasks(self):
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]
        return {
            'total': self.global_progress['queue_length'],
            'current': self.current_tasks
        }

    def compute_queue_bytesize(self):
        """
        Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue.
        :return:float
        """
        if not hasattr(self, 'current_store'):
            return 0
        total = 0
        exclude_pathes = []
        for task in self.processing_signals:
            if 'remaining_bytes' in task:
                total += float(task['remaining_bytes'])
                exclude_pathes.append('"' + task['target'] + '"')
        where = ''
        if len(exclude_pathes):
            where = "target IN (" + ','.join(exclude_pathes) + ")"
            return self.current_store.sum_sizes(where)
        else:
            return self.current_store.sum_sizes()

    def start_now(self):
        """
        Resume task (set it in running mode) and make sure the cycle starts now
        :return:
        """
        self.last_run = 0
        self.sdk.remove_interrupt()
        self.resume()

    def pause(self):
        """
        Set the task in pause. The thread is still running, but the cycle does nothing.
        :return:None
        """
        self.job_status_running = False
        self.sdk.set_interrupt()
        self.info(_('Job Paused'), toUser='******', channel='status')

    def resume(self):
        """
        Set the task out of pause mode.
        :return:
        """
        self.job_status_running = True
        self.sdk.remove_interrupt()
        self.info(_('Job Started'), toUser='******', channel='status')

    def stop(self):
        """
        Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop.
        :return:
        """
        if self.watcher:
            logging.debug("Stopping watcher: %s" % self.watcher)
            self.watcher.stop()
        self.info(_('Job stopping'), toUser='******', channel='status')
        self.sdk.set_interrupt()
        self.interrupt = True

    def sleep_offline(self):
        """
        Sleep the thread for a "long" time (offline time)
        :return:
        """
        self.online_status = False
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def sleep_online(self):
        """
        Sleep the thread for a "short" time (online time)
        :return:
        """
        self.online_status = True
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def exit_loop_clean(self, logger):
        self.marked_for_snapshot_pathes = []
        self.current_store.close()
        self.init_global_progress()
        logger.log_state(_('Synchronized'), 'success')
        if self.job_config.frequency == 'manual':
            self.job_status_running = False
            self.sleep_offline()
        else:
            self.sleep_online()

    def run(self):
        """
        Start the thread
        """
        logger = EventLogger(self.configs_path)
        very_first = False

        if self.watcher:
            if self.watcher_first_run:

                def status_callback(status):
                    logger.log_state(status, 'sync')

                self.init_global_progress()

                try:
                    self.global_progress['status_indexing'] = 1
                    logger.log_state(
                        _('Checking changes since last launch...'), "sync")
                    very_first = True
                    self.watcher.check_from_snapshot(
                        state_callback=status_callback)
                except DBCorruptedException as e:
                    self.stop()
                    JobsLoader.Instance().clear_job_data(self.job_config.id)
                    logging.error(e)
                    return
                except Exception as e:
                    self.interrupt = True
                    logger.log_state(
                        _('Oops, error while indexing the local folder. Pausing the task.'
                          ), 'error')
                    logging.error(e)

                self.watcher_first_run = False
            self.watcher.start()

        while not self.interrupt:

            try:
                # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq))
                self.processing_signals = {}
                self.init_global_progress()
                if very_first:
                    self.global_progress['status_indexing'] = 1

                interval = int(time.time() - self.last_run)
                if (self.online_status and interval < self.online_timer) or (
                        not self.online_status
                        and interval < self.offline_timer):
                    time.sleep(self.event_timer)
                    continue

                if not self.job_status_running:
                    logging.debug("self.online_timer: %s" % self.online_timer)
                    logger.log_state(_('Status: Paused'), "sync")
                    self.sleep_offline()
                    continue

                if self.job_config.frequency == 'time':
                    start_time = datetime.time(
                        int(self.job_config.start_time['h']),
                        int(self.job_config.start_time['m']))
                    end_time = datetime.time(
                        int(self.job_config.start_time['h']),
                        int(self.job_config.start_time['m']), 59)
                    now = datetime.datetime.now().time()
                    if not start_time < now < end_time:
                        logger.log_state(
                            _('Status: scheduled for %s') % str(start_time),
                            "sync")
                        self.sleep_offline()
                        continue
                    else:
                        logging.info(
                            "Now triggering synchro as expected at time " +
                            str(start_time))

                if not self.system.check_basepath():
                    log = _(
                        'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry'
                    ) % self.offline_timer
                    logging.error(log)
                    logger.log_state(
                        _('Cannot find local folder, did you disconnect a volume?'
                          ), "error")
                    self.sleep_offline()
                    continue

                if self.watcher:
                    for snap_path in self.marked_for_snapshot_pathes:
                        logging.info(
                            'LOCAL SNAPSHOT : loading snapshot for directory %s'
                            % snap_path)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.watcher.check_from_snapshot(snap_path)

                # Load local and/or remote changes, depending on the direction
                from pydio.job.change_stores import SqliteChangeStore
                self.current_store = SqliteChangeStore(
                    self.configs_path + '/changes.sqlite',
                    self.job_config.filters['includes'],
                    self.job_config.filters['excludes'])
                self.current_store.open()
                try:
                    if self.job_config.direction != 'up':
                        logging.info(
                            'Loading remote changes with sequence %s' %
                            str(self.remote_seq))
                        if self.remote_seq == 0:
                            logger.log_state(
                                _('Gathering data from remote workspace, this can take a while...'
                                  ), 'sync')
                            very_first = True
                        self.remote_target_seq = self.load_remote_changes_in_store(
                            self.remote_seq, self.current_store)
                        self.current_store.sync()
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except RequestException as ce:
                    if not connection_helper.is_connected_to_internet():
                        error = _(
                            'No Internet connection detected! Waiting for %s seconds to retry'
                        ) % self.offline_timer
                    else:
                        error = _(
                            'Connection to server failed, server is probably down. Waiting %s seconds to retry'
                        ) % self.offline_timer
                    self.marked_for_snapshot_pathes = []
                    logging.error(error)
                    logger.log_state(error, "wait")
                    self.sleep_offline()
                    continue
                except Exception as e:
                    error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (
                        e.message, self.offline_timer)
                    logging.error(error)
                    logger.log_state(
                        _('Error while connecting to remote server (%s)') %
                        e.message, "error")
                    self.marked_for_snapshot_pathes = []
                    self.sleep_offline()
                    continue
                self.online_status = True
                if not self.job_config.server_configs:
                    self.job_config.server_configs = self.sdk.load_server_configs(
                    )
                self.sdk.set_server_configs(self.job_config.server_configs)

                if self.job_config.direction != 'down':
                    logging.info('Loading local changes with sequence ' +
                                 str(self.local_seq))
                    self.local_target_seq = self.db_handler.get_local_changes_as_stream(
                        self.local_seq, self.current_store.flatten_and_store)
                    self.current_store.sync()
                else:
                    self.local_target_seq = 1
                if not connection_helper.internet_ok:
                    connection_helper.is_connected_to_internet()

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected')
                    self.update_min_seqs_from_store()
                    self.exit_loop_clean(logger)
                    very_first = False
                    continue

                self.global_progress['status_indexing'] = 1
                logging.info('Reducing changes')
                logger.log_state(
                    _('Merging changes between remote and local, please wait...'
                      ), 'sync')

                logging.debug('Delete Copies')
                self.current_store.delete_copies()
                self.update_min_seqs_from_store()
                logging.debug('Dedup changes')
                self.current_store.dedup_changes()
                self.update_min_seqs_from_store()
                if not self.storage_watcher or very_first:
                    logging.debug('Detect unnecessary changes')
                    self.current_store.detect_unnecessary_changes(
                        local_sdk=self.system, remote_sdk=self.sdk)
                self.update_min_seqs_from_store()
                logging.debug('Clearing op and pruning folders moves')
                self.current_store.clear_operations_buffer()
                self.current_store.prune_folders_moves()
                self.update_min_seqs_from_store()

                logging.debug('Store conflicts')
                store_conflicts = self.current_store.clean_and_detect_conflicts(
                    self.db_handler)
                if store_conflicts:
                    logging.info('Conflicts detected, cannot continue!')
                    logger.log_state(_('Conflicts detected, cannot continue!'),
                                     'error')
                    self.current_store.close()
                    self.sleep_offline()
                    continue

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected')
                    self.exit_loop_clean(logger)
                    very_first = False
                    continue

                self.global_progress['status_indexing'] = 0
                import change_processor
                self.global_progress['queue_length'] = changes_length
                logging.info('Processing %i changes' % changes_length)
                logger.log_state(
                    _('Processing %i changes') % changes_length, "start")
                counter = [1]

                def processor_callback(change):
                    try:
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.update_current_tasks()
                        self.update_global_progress()
                        Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor
                        proc = Processor(change, self.current_store,
                                         self.job_config, self.system,
                                         self.sdk, self.db_handler,
                                         self.event_logger)
                        proc.process_change()
                        self.update_min_seqs_from_store(success=True)
                        self.global_progress['queue_done'] = float(counter[0])
                        counter[0] += 1
                        self.update_current_tasks()
                        self.update_global_progress()
                        time.sleep(0.1)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()

                    except ProcessException as pe:
                        logging.error(pe.message)
                        return False
                    except InterruptException as i:
                        raise i
                    except PydioSdkDefaultException as p:
                        raise p
                    except Exception as ex:
                        logging.exception(ex.message)
                        return False
                    return True

                try:
                    if sys.platform.startswith('win'):
                        self.marked_for_snapshot_pathes = list(
                            set(self.current_store.find_modified_parents()) -
                            set(self.marked_for_snapshot_pathes))
                    self.current_store.process_changes_with_callback(
                        processor_callback)
                except InterruptException as iexc:
                    pass
                logger.log_state(
                    _('%i files modified') %
                    self.global_progress['queue_done'], 'success')
                if self.global_progress['queue_done']:
                    logger.log_notif(
                        _('%i files modified') %
                        self.global_progress['queue_done'], 'success')

            except PydioSdkDefaultException as re:
                logging.error(re.message)
                logger.log_state(re.message, 'error')
            except SSLError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('An SSL error happened, please check the logs'), 'error')
            except ProxyError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('A proxy error happened, please check the logs'),
                    'error')
            except TooManyRedirects as rt:
                logging.error(rt.message)
                logger.log_state(_('Connection error: too many redirects'),
                                 'error')
            except ChunkedEncodingError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('Chunked encoding error, please check the logs'),
                    'error')
            except ContentDecodingError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('Content Decoding error, please check the logs'),
                    'error')
            except InvalidSchema as rt:
                logging.error(rt.message)
                logger.log_state(_('Http connection error: invalid schema.'),
                                 'error')
            except InvalidURL as rt:
                logging.error(rt.message)
                logger.log_state(_('Http connection error: invalid URL.'),
                                 'error')
            except Timeout as to:
                logging.error(to)
                logger.log_state(_('Connection timeout, will retry later.'),
                                 'error')
            except RequestException as ree:
                logging.error(ree.message)
                logger.log_state(_('Cannot resolve domain!'), 'error')
            except Exception as e:
                if not (e.message.lower().count('[quota limit reached]')
                        or e.message.lower().count('[file permissions]')):
                    logging.exception('Unexpected Error: %s' % e.message)
                    logger.log_state(
                        _('Unexpected Error: %s') % e.message, 'error')

            logging.debug('Finished this cycle, waiting for %i seconds' %
                          self.online_timer)
            self.exit_loop_clean(logger)
            very_first = False

    def update_min_seqs_from_store(self, success=False):
        self.local_seq = self.current_store.get_min_seq('local',
                                                        success=success)
        if self.local_seq == -1:
            self.local_seq = self.local_target_seq
        self.remote_seq = self.current_store.get_min_seq('remote',
                                                         success=success)
        if self.remote_seq == -1:
            self.remote_seq = self.remote_target_seq
        logging.debug('Storing sequences remote=%i local=%i', self.remote_seq,
                      self.local_seq)
        pickle.dump(dict(local=self.local_seq, remote=self.remote_seq),
                    open(self.configs_path + '/sequences', 'wb'))
        if self.event_handler:
            self.event_handler.last_seq_id = self.local_seq

    def ping_remote(self):
        """
        Simple stat of the remote server root, to know if it's reachable.
        :return:bool
        """
        test = self.sdk.stat('/')
        return (test != False)

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser:
            dispatcher.send(signal=PUBLISH_SIGNAL,
                            sender=self,
                            channel=channel,
                            message=message)

    def load_remote_changes_in_store(self, seq_id, store):
        last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        return last_seq
Ejemplo n.º 4
0
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""
    @pydio_profile
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        sqlite_files = [
            file for file in os.listdir(self.configs_path)
            if file.endswith(".sqlite")
        ]

        for sqlite_file in sqlite_files:
            try:
                exists_and_correct = check_sqlite_file(
                    os.path.join(self.configs_path, sqlite_file))
                if exists_and_correct:
                    logging.info(
                        "Structure and Integrity of SQLite file %s is intact "
                        % str(os.path.join(self.configs_path, sqlite_file)))
            except DBCorruptedException as e:
                logging.debug(
                    "SQLite file %s is corrupted (Reason: %s), Deleting file and Reinitialising sync"
                    % (str(os.path.join(self.configs_path,
                                        sqlite_file)), e.message))
                os.unlink(os.path.join(self.configs_path, sqlite_file))
                self.update_sequences_file(0, 0)

        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies(),
            timeout=job_config.timeout)
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path,
                                         job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = job_config.online_timer
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')
        self.wait_for_changes = False  # True when no changes detected in last cycle, can be used to disable websockets
        self.marked_for_snapshot_pathes = []
        self.processing = False  # indicates whether changes are being processed

        dispatcher.send(signal=PUBLISH_SIGNAL,
                        sender=self,
                        channel='status',
                        message='START')
        if job_config.direction != 'down' or (
                self.job_config.direction == 'down'
                and self.job_config.solve != 'remote'):
            self.event_handler = SqlEventHandler(
                includes=job_config.filters['includes'],
                excludes=job_config.filters['excludes'],
                basepath=job_config.directory,
                job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(os.path.join(self.configs_path, "sequences")):
            try:
                with open(os.path.join(self.configs_path, "sequences"),
                          "rb") as f:
                    sequences = pickle.load(f)
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception as e:
                logging.exception(e)
                # Wrong content, remove sequences file.
                os.unlink(os.path.join(self.configs_path, "sequences"))

        dispatcher.connect(self.handle_transfer_rate_event,
                           signal=TRANSFER_RATE_SIGNAL,
                           sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event,
                           signal=TRANSFER_CALLBACK_SIGNAL,
                           sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False
        self.logger = EventLogger(self.configs_path)

    # end init

    def update_sequences_file(self, local_seq, remote_seq):
        with open(os.path.join(self.configs_path, "sequences"), "wb") as f:
            pickle.dump(dict(local=local_seq, remote=remote_seq), f)

    @pydio_profile
    def handle_transfer_callback_event(self, sender, change):
        self.processing_signals[change['target']] = change
        self.global_progress["queue_bytesize"] -= change['bytes_sent']
        # The following 3 lines are a dirty fix, only working for one file at a time size relaining... Better than NaN
        if self.global_progress["queue_bytesize"] < 0:
            self.global_progress["queue_bytesize"] = abs(
                self.global_progress["queue_bytesize"])
        self.global_progress["queue_bytesize"] = max(
            change['total_size'] - change['total_bytes_sent'],
            self.global_progress["queue_bytesize"])
        self.global_progress["queue_done"] += float(
            change['bytes_sent']) / float(change["total_size"])

    @pydio_profile
    def handle_transfer_rate_event(self, sender, transfer_rate):
        """
        Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value.
        :param sender:Any
        :param transfer_rate:float
        :return:
        """
        if self.global_progress['last_transfer_rate'] > 0:
            self.global_progress['last_transfer_rate'] = (
                float(transfer_rate) +
                self.global_progress['last_transfer_rate']) / 2.0
        else:
            self.global_progress['last_transfer_rate'] = float(transfer_rate)

    @pydio_profile
    def is_running(self):
        """
        Whether the job is in Running state or not.
        :return:bool
        """
        return self.job_status_running

    def init_global_progress(self):
        """
        Initialize the internal progress data
        :return:None
        """
        self.global_progress = {
            'status_indexing': 0,
            'queue_length': 0,
            'queue_done': 0.0,
            'queue_bytesize': 0,
            'last_transfer_rate': -1,
            'queue_start_time': time.clock(),
            'total_time': 0
        }

    @pydio_profile
    def update_global_progress(self, compute_queue_size=True):
        """
        Compute a dict representation with many indications about the current state of the queue
        :return: dict
        """
        self.global_progress['total_time'] = time.clock(
        ) - self.global_progress['queue_start_time']
        if compute_queue_size:
            self.global_progress[
                "queue_bytesize"] = self.compute_queue_bytesize()
        # compute an eta
        eta = -1
        if self.global_progress[
                'last_transfer_rate'] > -1 and self.global_progress[
                    'queue_bytesize'] > 0:
            eta = self.global_progress[
                'queue_bytesize'] / self.global_progress['last_transfer_rate']
        elif self.global_progress['queue_done']:
            remaining_operations = self.global_progress[
                'queue_length'] - self.global_progress['queue_done']
            eta = remaining_operations * self.global_progress[
                'total_time'] / self.global_progress['queue_done']

        self.global_progress['eta'] = eta

        # logging.info(self.global_progress)
        return self.global_progress

    def get_global_progress(self):
        self.update_global_progress(compute_queue_size=False)
        return self.global_progress

    @pydio_profile
    def update_current_tasks(self, cursor=0, limit=5):
        """
        Get a list of the current tasks
        :return: list()
        """
        if not hasattr(self, 'current_store'):
            return []
        self.current_tasks = self.current_store.list_changes(cursor, limit)
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]

    @pydio_profile
    def get_current_tasks(self):
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]
        return {
            'total': self.global_progress['queue_length'],
            'current': self.current_tasks
        }

    def get_websocket_status(self):
        try:
            return self.sdk.waiter and self.sdk.waiter.ws.connected
        except AttributeError:
            pass

    @pydio_profile
    def compute_queue_bytesize(self):
        """
        Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue.
        :return:float
        """
        if not hasattr(self, 'current_store'):
            return 0
        total = 0
        exclude_pathes = []
        for task in self.processing_signals:
            if 'remaining_bytes' in task:
                total += float(task['remaining_bytes'])
                exclude_pathes.append('"' + task['target'] + '"')
        if len(exclude_pathes):
            where = "target IN (" + ','.join(exclude_pathes) + ")"
            return self.current_store.sum_sizes(where)
        else:
            return self.current_store.sum_sizes()

    @pydio_profile
    def start_now(self):
        """
        Resume task (set it in running mode) and make sure the cycle starts now
        :return:
        """
        self.last_run = 0
        self.sdk.remove_interrupt()
        self.resume()

    def pause(self):
        """
        Set the task in pause. The thread is still running, but the cycle does nothing.
        :return:None
        """
        self.job_status_running = False
        self.sdk.set_interrupt()
        self.info(_('Job Paused'), toUser='******', channel='status')

    @pydio_profile
    def resume(self):
        """
        Set the task out of pause mode.
        :return:
        """
        self.job_status_running = True
        self.sdk.proxies = ConfigManager.Instance().get_defined_proxies()
        self.sdk.remove_interrupt()
        self.info(_('Job Started'), toUser='******', channel='status')

    def stop(self):
        """
        Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop.
        :return:
        """
        if self.watcher:
            logging.debug("Stopping watcher: %s" % self.watcher)
            self.watcher.stop()
        self.info(_('Job stopping'), toUser='******', channel='status')
        self.sdk.set_interrupt()
        self.interrupt = True

    def sleep_offline(self):
        """
        Sleep the thread for a "long" time (offline time)
        :return:
        """
        self.online_status = False
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def sleep_online(self):
        """
        Sleep the thread for a "short" time (online time)
        :return:
        """
        self.online_status = True
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def exit_loop_clean(self, logger):
        self.current_store.close()
        self.processing = False
        self.init_global_progress()
        logger.log_state(_('Synchronized'), 'success')
        if self.job_config.frequency == 'manual':
            self.job_status_running = False
            self.sleep_offline()
        else:
            self.sleep_online()

    def exit_loop_error(self, message):
        self.current_store.close()
        self.init_global_progress()
        self.logger.log_state(message, 'error')
        if self.job_config.frequency == 'manual':
            self.job_status_running = False
            self.sleep_offline()
        else:
            self.sleep_online()

    @pydio_profile
    def run(self):
        """
        Start the thread
        """
        very_first = False
        self.start_watcher()

        while not self.interrupt:
            try:
                # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq))
                self.processing_signals = {}
                self.init_global_progress()
                if very_first:
                    self.global_progress['status_indexing'] = 1

                interval = int(time.time() - self.last_run)
                if (self.online_status and interval < self.online_timer) or (
                        not self.online_status
                        and interval < self.offline_timer):
                    time.sleep(self.event_timer)
                    continue

                if not self.job_status_running:
                    logging.debug("self.online_timer: %s" % self.online_timer)
                    self.logger.log_state(_('Status: Paused'), "sync")
                    self.sleep_offline()
                    continue

                if self.job_config.frequency == 'time':
                    start_time = datetime.time(
                        int(self.job_config.start_time['h']),
                        int(self.job_config.start_time['m']))
                    end_time = datetime.time(
                        int(self.job_config.start_time['h']),
                        int(self.job_config.start_time['m']), 59)
                    now = datetime.datetime.now().time()
                    if not start_time < now < end_time:
                        self.logger.log_state(
                            _('Status: scheduled for %s') % str(start_time),
                            "sync")
                        self.sleep_offline()
                        continue
                    else:
                        logging.info(
                            "Now triggering synchro as expected at time " +
                            str(start_time))

                if not self.system.check_basepath():
                    log = _(
                        'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry'
                    ) % self.offline_timer
                    logging.error(log)
                    self.logger.log_state(
                        _('Cannot find local folder, did you disconnect a volume?'
                          ), "error")
                    self.sleep_offline()
                    continue

                # Before starting infinite loop, small check that remote folder still exists
                if not self.sdk.check_basepath():
                    # if it doesn't try to create it
                    self.sdk.remote_folder = os.path.join(
                        "/", self.sdk.remote_folder)
                    try:
                        logging.info("Creating remote directory.")
                        self.sdk.mkdir("")
                        continue
                    except Exception as e:
                        logging.exception(e)
                        self.sleep_offline()
                    if not self.sdk.check_basepath():
                        log = _(
                            'Cannot find remote folder, maybe it was renamed? Sync cannot start, please check the configuration.'
                        )
                        logging.error(log)
                        self.logger.log_state(log, 'error')
                        self.sleep_offline()
                        continue

                if self.watcher:
                    for snap_path in self.marked_for_snapshot_pathes:
                        logging.info(
                            'LOCAL SNAPSHOT : loading snapshot for directory %s'
                            % snap_path)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.watcher.check_from_snapshot(snap_path,
                                                         use_transaction=False)
                    self.marked_for_snapshot_pathes = []

                writewait = .5  # To avoid reading events before they're written (db lock) wait for writing to finish
                totalwait = 0
                while self.event_handler and self.event_handler.locked and totalwait < 60 * 3:
                    logging.info(
                        "Waiting for changes to be written before retrieving remote changes."
                    )
                    if writewait < 5:
                        writewait += .5
                    time.sleep(writewait)
                    totalwait += writewait
                # Load local and/or remote changes, depending on the direction
                self.current_store = SqliteChangeStore(
                    self.configs_path + '/changes.sqlite',
                    self.job_config.filters['includes'],
                    self.job_config.filters['excludes'],
                    self.job_config.poolsize,
                    local_sdk=self.system,
                    remote_sdk=self.sdk,
                    job_config=self.job_config,
                    db_handler=self.db_handler)
                self.current_store.open()
                try:
                    if self.job_config.direction != 'up':
                        logging.info(
                            'Loading remote changes with sequence {0:s} for job id {1:s}'
                            .format(str(self.remote_seq),
                                    str(self.job_config.id)))
                        if self.remote_seq == 0:
                            self.logger.log_state(
                                _('Gathering data from remote workspace, this can take a while...'
                                  ), 'sync')
                            very_first = True
                        self.remote_target_seq = self.load_remote_changes_in_store(
                            self.remote_seq, self.current_store)
                        self.current_store.sync()
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except RequestException as ce:
                    logging.exception(ce)
                    if not connection_helper.is_connected_to_internet(
                            self.sdk.proxies):
                        error = _(
                            'No Internet connection detected! Waiting for %s seconds to retry'
                        ) % self.offline_timer
                    else:
                        error = _(
                            'Connection to server failed, server is probably down. Waiting %s seconds to retry'
                        ) % self.offline_timer
                    self.marked_for_snapshot_pathes = []
                    logging.error(error)
                    self.logger.log_state(error, "wait")
                    self.current_store.close()
                    self.sleep_offline()
                    continue
                except PydioSdkBasicAuthException as e:
                    self.logger.log_state(_('Authentication Error'), 'error')
                    self.logger.log_notif(_('Authentication Error'), 'error')
                    self.current_store.close()
                    self.sleep_offline()
                    continue
                except Exception as e:
                    error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (
                        e.message, self.offline_timer)
                    logging.exception(e)
                    self.logger.log_state(
                        _('Error while connecting to remote server (%s)') %
                        e.message, "error")
                    self.marked_for_snapshot_pathes = []
                    self.current_store.close()
                    self.sleep_offline()
                    continue
                self.online_status = True
                if not self.job_config.server_configs:
                    self.job_config.server_configs = self.sdk.load_server_configs(
                    )
                self.sdk.set_server_configs(self.job_config.server_configs)

                if self.job_config.direction != 'down' or (
                        self.job_config.direction == 'down'
                        and self.job_config.solve != 'remote'):
                    logging.info(
                        'Loading local changes with sequence {0:s} for job id {1:s}'
                        .format(str(self.local_seq), str(self.job_config.id)))
                    self.local_target_seq = self.db_handler.get_local_changes_as_stream(
                        self.local_seq, self.current_store.flatten_and_store)
                    self.current_store.sync()
                else:
                    self.local_target_seq = 1
                if not connection_helper.internet_ok:
                    connection_helper.is_connected_to_internet(
                        self.sdk.proxies)

                changes_length = len(self.current_store)
                if not changes_length:
                    self.processing = False
                    logging.info('No changes detected in ' +
                                 self.job_config.id)
                    self.update_min_seqs_from_store()
                    self.wait_for_changes = True
                    self.exit_loop_clean(self.logger)
                    very_first = False
                    # logging.info("CheckSync of " + self.job_config.id)
                    # self.db_handler.list_non_idle_nodes()
                    if not self.watcher.isAlive() and not self.interrupt:
                        logging.info("File watcher died, restarting...")
                        self.watcher.stop()
                        self.watcher = LocalWatcher(
                            self.job_config.directory,
                            self.configs_path,
                            event_handler=self.event_handler)
                        self.start_watcher()
                    continue

                self.global_progress['status_indexing'] = 1
                logging.info('Reducing changes for ' + self.job_config.id)
                self.logger.log_state(
                    _('Merging changes between remote and local, please wait...'
                      ), 'sync')

                logging.debug('[CMERGER] DETECTING MOVES ' +
                              self.job_config.id)
                self.current_store.detect_win_moves()
                self.update_min_seqs_from_store()

                logging.debug('[CMERGER] REMOVING ECHO ' + self.job_config.id)
                self.current_store.clean_echoes()
                self.update_min_seqs_from_store()

                # We are updating the status to IDLE here for the nodes which has status as NEW
                # The reason is when we create a new sync on the existing folder, some of the files might
                # already be synchronized and we ignore those files while we Dedup changes and those files
                # remain untouched later.
                # So the flow of node status change will occur as follows
                # NEW (as soon as we create a new sync task)
                #  |
                # IDLE (here, just before we reduce changes)
                #  |
                # PENDING (those files/folders which remain after reducing changes and to be actually processed)
                #  |
                # UP / DOWN / CONFLICT (corresponding the operation which occurs)
                #  |
                # IDLE (The final state once upload/ download happens or once when the conflict is resolved)
                self.db_handler.update_bulk_node_status_as_idle()

                logging.debug('[CMERGER] Delete Copies ' + self.job_config.id)
                self.current_store.delete_copies()
                self.update_min_seqs_from_store()
                logging.debug('[CMERGER] Dedup changes ' + self.job_config.id)
                self.current_store.dedup_changes()
                self.update_min_seqs_from_store()
                if not self.storage_watcher or very_first:
                    logging.debug('[CMERGER] Detect unnecessary changes ' +
                                  self.ws_id)
                    self.logger.log_state(
                        _('Detecting unnecessary changes...'), 'sync')
                    self.current_store.detect_unnecessary_changes()
                    logging.debug(
                        '[CMERGER] Done detecting unnecessary changes')
                    self.logger.log_state(
                        _('Done detecting unnecessary changes...'), 'sync')
                self.update_min_seqs_from_store()
                logging.debug('Clearing op and pruning folders moves ' +
                              self.job_config.id)
                self.current_store.clear_operations_buffer()
                self.current_store.prune_folders_moves()
                self.update_min_seqs_from_store()

                logging.debug('Store conflicts ' + self.job_config.id)
                store_conflicts = self.current_store.clean_and_detect_conflicts(
                    self.db_handler)
                if store_conflicts:
                    if self.job_config.solve == 'both':
                        logging.info('Marking nodes SOLVED:KEEPBOTH')
                        for row in self.db_handler.list_conflict_nodes():
                            self.db_handler.update_node_status(
                                row['node_path'], 'SOLVED:KEEPBOTH')
                        store_conflicts = self.current_store.clean_and_detect_conflicts(
                            self.db_handler)
                    if self.job_config.solve == 'local':
                        logging.info('Marking nodes SOLVED:KEEPLOCAL')
                        for row in self.db_handler.list_conflict_nodes():
                            self.db_handler.update_node_status(
                                row['node_path'], 'SOLVED:KEEPLOCAL')
                        store_conflicts = self.current_store.clean_and_detect_conflicts(
                            self.db_handler)
                    if self.job_config.solve == 'remote':
                        logging.info('Marking nodes SOLVED:KEEPREMOTE')
                        for row in self.db_handler.list_conflict_nodes():
                            self.db_handler.update_node_status(
                                row['node_path'], 'SOLVED:KEEPREMOTE')
                        store_conflicts = self.current_store.clean_and_detect_conflicts(
                            self.db_handler)

                if store_conflicts:
                    logging.info('Conflicts detected, cannot continue!')
                    self.logger.log_state(
                        _('Conflicts detected, cannot continue!'), 'error')
                    self.current_store.close()
                    self.sleep_offline()
                    self.logger.log_notif(
                        _('Conflicts detected, cannot continue!'), 'error')
                    continue

                if self.job_config.direction == 'down' and self.job_config.solve != 'remote':
                    self.current_store.remove_based_on_location('local')
                    self.update_min_seqs_from_store()

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected for ' +
                                 self.job_config.id)
                    self.exit_loop_clean(self.logger)
                    very_first = False
                    continue

                self.current_store.update_pending_status(
                    self.db_handler, self.local_seq)

                self.global_progress['status_indexing'] = 0
                import change_processor
                self.global_progress['queue_length'] = changes_length
                logging.info('Processing %i changes' % changes_length)
                self.logger.log_state(
                    _('Processing %i changes') % changes_length, "start")
                counter = [1]

                def processor_callback(change):
                    try:
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.update_current_tasks()
                        self.update_global_progress()
                        Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor
                        proc = Processor(change, self.current_store,
                                         self.job_config, self.system,
                                         self.sdk, self.db_handler,
                                         self.event_logger)
                        proc.process_change()
                        self.update_min_seqs_from_store(success=True)
                        self.global_progress['queue_done'] = float(counter[0])
                        counter[0] += 1
                        self.update_current_tasks()
                        self.update_global_progress()
                        time.sleep(0.01)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()

                    except ProcessException as pe:
                        logging.error(pe.message)
                        return False
                    except InterruptException as i:
                        raise i
                    except PydioSdkDefaultException as p:
                        raise p
                    except Exception as ex:
                        logging.exception(ex)
                        return False
                    return True

                try:
                    if sys.platform.startswith('win'):
                        self.marked_for_snapshot_pathes = list(
                            set(self.current_store.find_modified_parents()) -
                            set(self.marked_for_snapshot_pathes))
                    if not self.processing:
                        self.processing = True
                        self.current_store.process_changes_with_callback(
                            processor_callback)
                        # logging.info("Updating seqs")
                        self.current_store.process_pending_changes()
                        self.update_min_seqs_from_store(success=True)
                        self.global_progress['queue_done'] = float(counter[0])
                        counter[0] += 1
                        self.update_current_tasks()
                        self.update_global_progress()
                        time.sleep(
                            0.05)  # Allow for changes to be noticeable in UI
                        # logging.info("DONE WITH CHANGES")
                        self.processing = False

                except InterruptException as iexc:
                    pass
                self.logger.log_state(
                    _('%i files modified') %
                    self.global_progress['queue_done'], 'success')
                if self.global_progress['queue_done']:
                    self.logger.log_notif(
                        _('%i files modified') %
                        self.global_progress['queue_done'], 'success')

                very_first = False
                self.exit_loop_clean(self.logger)
                continue

            except PydioSdkDefaultException as re:
                logging.error(re.message)
                self.logger.log_state(re.message, 'error')
                self.sleep_online()
            except SSLError as rt:
                logging.error(rt.message)
                self.logger.log_state(
                    _('An SSL error happened, please check the logs'), 'error')
                self.sleep_offline()
            except ProxyError as rt:
                logging.error(rt.message)
                self.logger.log_state(
                    _('A proxy error happened, please check the logs'),
                    'error')
                self.sleep_offline()
            except TooManyRedirects as rt:
                logging.error(rt.message)
                self.logger.log_state(
                    _('Connection error: too many redirects'), 'error')
                self.sleep_offline()
            except ChunkedEncodingError as rt:
                logging.error(rt.message)
                self.logger.log_state(
                    _('Chunked encoding error, please check the logs'),
                    'error')
                self.sleep_offline()
            except ContentDecodingError as rt:
                logging.error(rt.message)
                self.logger.log_state(
                    _('Content Decoding error, please check the logs'),
                    'error')
                self.sleep_online()
            except InvalidSchema as rt:
                logging.error(rt.message)
                self.logger.log_state(
                    _('Http connection error: invalid schema.'), 'error')
                self.sleep_offline()
            except InvalidURL as rt:
                logging.error(rt.message)
                self.logger.log_state(_('Http connection error: invalid URL.'),
                                      'error')
                self.sleep_offline()
            except Timeout as to:
                logging.error(to)
                self.logger.log_state(
                    _('Connection timeout, will retry later.'), 'error')
                self.sleep_offline()
            except RequestException as ree:
                logging.error(ree.message)
                self.logger.log_state(_('Cannot resolve domain!'), 'error')
                self.sleep_offline()
            except PydioSdkBasicAuthException as e:
                self.logger.log_state(_('Authentication Error'), 'error')
                self.logger.log_notif(_('Authentication Error'), 'error')
                self.sleep_offline()
            except Exception as e:
                if not (e.message.lower().count('[quota limit reached]')
                        or e.message.lower().count('[file permissions]')):
                    logging.exception('Unexpected Error: %s' % e.message)
                    self.logger.log_state(
                        _('Unexpected Error: %s') % e.message, 'error')
                else:
                    logging.exception(e)
                self.sleep_offline()
            self.current_store.close()
            self.processing = False
            logging.debug('Finished this cycle, waiting for %i seconds' %
                          self.online_timer)
            very_first = False

    def start_watcher(self):
        if self.watcher:
            if self.watcher_first_run:

                def status_callback(status):
                    self.logger.log_state(status, 'sync')

                self.init_global_progress()

                try:
                    self.global_progress['status_indexing'] = 1
                    self.logger.log_state(
                        _('Checking changes since last launch...'), "sync")
                    very_first = True
                    self.db_handler.update_bulk_node_status_as_idle()
                    self.watcher.check_from_snapshot(
                        state_callback=status_callback)
                except DBCorruptedException as e:
                    self.stop()
                    JobsLoader.Instance().clear_job_data(self.job_config.id)
                    logging.error(e)
                    return
                except Exception as e:
                    logging.exception(e)
                    self.interrupt = True
                    self.logger.log_state(
                        _('Oops, error while indexing the local folder. Pausing the task.'
                          ), 'error')
                    logging.error(e)

                self.watcher_first_run = False
            self.watcher.start()

    @pydio_profile
    def update_min_seqs_from_store(self, success=False):
        self.local_seq = self.current_store.get_min_seq('local',
                                                        success=success)
        if self.local_seq == -1:
            self.local_seq = self.local_target_seq
        self.remote_seq = self.current_store.get_min_seq('remote',
                                                         success=success)
        if self.remote_seq == -1:
            self.remote_seq = self.remote_target_seq
        # logging.info('Storing sequences remote=%i local=%i', self.remote_seq, self.local_seq)
        self.update_sequences_file(self.local_seq, self.remote_seq)
        if self.event_handler:
            self.event_handler.last_seq_id = self.local_seq

    @pydio_profile
    def ping_remote(self):
        """
        Simple stat of the remote server root, to know if it's reachable.
        :return:bool
        """
        test = self.sdk.stat('/')
        return (test != False)

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser:
            dispatcher.send(signal=PUBLISH_SIGNAL,
                            sender=self,
                            channel=channel,
                            message=message)

    @pydio_profile
    def load_remote_changes_in_store(self, seq_id, store):
        last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        # if self.wait_for_changes:
        #     timereq = time.time()
        #     try:
        #         self.sdk.websocket_connect(last_seq, str(self.job_config.id))
        #
        #         self.sdk.waiter.should_fetch_changes = False
        #         while not self.sdk.waiter.should_fetch_changes and not self.interrupt:
        #             time.sleep(2)
        #             # these break only after one run
        #             if self.local_seq != self.db_handler.get_max_seq():
        #                 # There was a local change
        #                 break
        #             if not self.sdk.waiter.ws.connected:
        #                 # websocket disconnected
        #                 break
        #     except Exception as e:
        #         logging.exception(e)
        #
        #     if time.time() - timereq > 10:  # if last_seq was updated more than 10s ago, update it
        #         last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        return last_seq