def __init__(self, job_config, job_data_path, pub_socket=False):
        threading.Thread.__init__(self)
        self.data_base = job_data_path
        self.job_config = job_config
        self.progress = 0

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.data_base, job_config.directory)
        self.interrupt = False
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        if pub_socket:
            self.pub_socket = pub_socket
            self.info('Job Started', toUser='******', channel='status')

        if os.path.exists(self.data_base + "/sequences"):
            sequences = pickle.load(open(self.data_base + "/sequences", "rb"))
            self.remote_seq = sequences['remote']
            self.local_seq = sequences['local']

        if job_config.direction != 'down':
            self.watcher = LocalWatcher(job_config.directory,
                                        job_config.filters['includes'],
                                        job_config.filters['excludes'],
                                        job_data_path)
        dispatcher.connect( self.handle_progress_event, signal=PROGRESS_SIGNAL, sender=dispatcher.Any )
Beispiel #2
0
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies())
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path,
                                         job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')

        self.marked_for_snapshot_pathes = []

        dispatcher.send(signal=PUBLISH_SIGNAL,
                        sender=self,
                        channel='status',
                        message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(
                includes=job_config.filters['includes'],
                excludes=job_config.filters['excludes'],
                basepath=job_config.directory,
                job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.configs_path + "/sequences"):
            try:
                sequences = pickle.load(
                    open(self.configs_path + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.configs_path + "/sequences")

        dispatcher.connect(self.handle_transfer_rate_event,
                           signal=TRANSFER_RATE_SIGNAL,
                           sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event,
                           signal=TRANSFER_CALLBACK_SIGNAL,
                           sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False
Beispiel #3
0
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies())
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path,
                                         job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')

        self.marked_for_snapshot_pathes = []

        dispatcher.send(signal=PUBLISH_SIGNAL,
                        sender=self,
                        channel='status',
                        message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(
                includes=job_config.filters['includes'],
                excludes=job_config.filters['excludes'],
                basepath=job_config.directory,
                job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.configs_path + "/sequences"):
            try:
                sequences = pickle.load(
                    open(self.configs_path + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.configs_path + "/sequences")

        dispatcher.connect(self.handle_transfer_rate_event,
                           signal=TRANSFER_RATE_SIGNAL,
                           sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event,
                           signal=TRANSFER_CALLBACK_SIGNAL,
                           sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False

    def handle_transfer_callback_event(self, sender, change):
        self.processing_signals[change['target']] = change
        self.global_progress["queue_bytesize"] -= change['bytes_sent']
        self.global_progress["queue_done"] += float(
            change['bytes_sent']) / float(change["total_size"])

    def handle_transfer_rate_event(self, sender, transfer_rate):
        """
        Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value.
        :param sender:Any
        :param transfer_rate:float
        :return:
        """
        if self.global_progress['last_transfer_rate'] > 0:
            self.global_progress['last_transfer_rate'] = (
                float(transfer_rate) +
                self.global_progress['last_transfer_rate']) / 2.0
        else:
            self.global_progress['last_transfer_rate'] = float(transfer_rate)

    def is_running(self):
        """
        Whether the job is in Running state or not.
        :return:bool
        """
        return self.job_status_running

    def init_global_progress(self):
        """
        Initialize the internal progress data
        :return:None
        """
        self.global_progress = {
            'status_indexing': 0,
            'queue_length': 0,
            'queue_done': 0.0,
            'queue_bytesize': 0,
            'last_transfer_rate': -1,
            'queue_start_time': time.clock(),
            'total_time': 0
        }

    def update_global_progress(self, compute_queue_size=True):
        """
        Compute a dict representation with many indications about the current state of the queue
        :return: dict
        """
        self.global_progress['total_time'] = time.clock(
        ) - self.global_progress['queue_start_time']
        if compute_queue_size:
            self.global_progress[
                "queue_bytesize"] = self.compute_queue_bytesize()
        # compute an eta
        eta = -1
        if self.global_progress[
                'last_transfer_rate'] > -1 and self.global_progress[
                    'queue_bytesize'] > 0:
            eta = self.global_progress[
                'queue_bytesize'] / self.global_progress['last_transfer_rate']
        elif self.global_progress['queue_done']:
            remaining_operations = self.global_progress[
                'queue_length'] - self.global_progress['queue_done']
            eta = remaining_operations * self.global_progress[
                'total_time'] / self.global_progress['queue_done']

        self.global_progress['eta'] = eta

        #logging.info(self.global_progress)
        return self.global_progress

    def get_global_progress(self):
        self.update_global_progress(compute_queue_size=False)
        return self.global_progress

    def update_current_tasks(self, cursor=0, limit=5):
        """
        Get a list of the current tasks
        :return: list()
        """
        if not hasattr(self, 'current_store'):
            return []
        self.current_tasks = self.current_store.list_changes(cursor, limit)
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]

    def get_current_tasks(self):
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]
        return {
            'total': self.global_progress['queue_length'],
            'current': self.current_tasks
        }

    def compute_queue_bytesize(self):
        """
        Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue.
        :return:float
        """
        if not hasattr(self, 'current_store'):
            return 0
        total = 0
        exclude_pathes = []
        for task in self.processing_signals:
            if 'remaining_bytes' in task:
                total += float(task['remaining_bytes'])
                exclude_pathes.append('"' + task['target'] + '"')
        where = ''
        if len(exclude_pathes):
            where = "target IN (" + ','.join(exclude_pathes) + ")"
            return self.current_store.sum_sizes(where)
        else:
            return self.current_store.sum_sizes()

    def start_now(self):
        """
        Resume task (set it in running mode) and make sure the cycle starts now
        :return:
        """
        self.last_run = 0
        self.sdk.remove_interrupt()
        self.resume()

    def pause(self):
        """
        Set the task in pause. The thread is still running, but the cycle does nothing.
        :return:None
        """
        self.job_status_running = False
        self.sdk.set_interrupt()
        self.info(_('Job Paused'), toUser='******', channel='status')

    def resume(self):
        """
        Set the task out of pause mode.
        :return:
        """
        self.job_status_running = True
        self.sdk.remove_interrupt()
        self.info(_('Job Started'), toUser='******', channel='status')

    def stop(self):
        """
        Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop.
        :return:
        """
        if self.watcher:
            logging.debug("Stopping watcher: %s" % self.watcher)
            self.watcher.stop()
        self.info(_('Job stopping'), toUser='******', channel='status')
        self.sdk.set_interrupt()
        self.interrupt = True

    def sleep_offline(self):
        """
        Sleep the thread for a "long" time (offline time)
        :return:
        """
        self.online_status = False
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def sleep_online(self):
        """
        Sleep the thread for a "short" time (online time)
        :return:
        """
        self.online_status = True
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def exit_loop_clean(self, logger):
        self.marked_for_snapshot_pathes = []
        self.current_store.close()
        self.init_global_progress()
        logger.log_state(_('Synchronized'), 'success')
        if self.job_config.frequency == 'manual':
            self.job_status_running = False
            self.sleep_offline()
        else:
            self.sleep_online()

    def run(self):
        """
        Start the thread
        """
        logger = EventLogger(self.configs_path)
        very_first = False

        if self.watcher:
            if self.watcher_first_run:

                def status_callback(status):
                    logger.log_state(status, 'sync')

                self.init_global_progress()

                try:
                    self.global_progress['status_indexing'] = 1
                    logger.log_state(
                        _('Checking changes since last launch...'), "sync")
                    very_first = True
                    self.watcher.check_from_snapshot(
                        state_callback=status_callback)
                except DBCorruptedException as e:
                    self.stop()
                    JobsLoader.Instance().clear_job_data(self.job_config.id)
                    logging.error(e)
                    return
                except Exception as e:
                    self.interrupt = True
                    logger.log_state(
                        _('Oops, error while indexing the local folder. Pausing the task.'
                          ), 'error')
                    logging.error(e)

                self.watcher_first_run = False
            self.watcher.start()

        while not self.interrupt:

            try:
                # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq))
                self.processing_signals = {}
                self.init_global_progress()
                if very_first:
                    self.global_progress['status_indexing'] = 1

                interval = int(time.time() - self.last_run)
                if (self.online_status and interval < self.online_timer) or (
                        not self.online_status
                        and interval < self.offline_timer):
                    time.sleep(self.event_timer)
                    continue

                if not self.job_status_running:
                    logging.debug("self.online_timer: %s" % self.online_timer)
                    logger.log_state(_('Status: Paused'), "sync")
                    self.sleep_offline()
                    continue

                if self.job_config.frequency == 'time':
                    start_time = datetime.time(
                        int(self.job_config.start_time['h']),
                        int(self.job_config.start_time['m']))
                    end_time = datetime.time(
                        int(self.job_config.start_time['h']),
                        int(self.job_config.start_time['m']), 59)
                    now = datetime.datetime.now().time()
                    if not start_time < now < end_time:
                        logger.log_state(
                            _('Status: scheduled for %s') % str(start_time),
                            "sync")
                        self.sleep_offline()
                        continue
                    else:
                        logging.info(
                            "Now triggering synchro as expected at time " +
                            str(start_time))

                if not self.system.check_basepath():
                    log = _(
                        'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry'
                    ) % self.offline_timer
                    logging.error(log)
                    logger.log_state(
                        _('Cannot find local folder, did you disconnect a volume?'
                          ), "error")
                    self.sleep_offline()
                    continue

                if self.watcher:
                    for snap_path in self.marked_for_snapshot_pathes:
                        logging.info(
                            'LOCAL SNAPSHOT : loading snapshot for directory %s'
                            % snap_path)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.watcher.check_from_snapshot(snap_path)

                # Load local and/or remote changes, depending on the direction
                from pydio.job.change_stores import SqliteChangeStore
                self.current_store = SqliteChangeStore(
                    self.configs_path + '/changes.sqlite',
                    self.job_config.filters['includes'],
                    self.job_config.filters['excludes'])
                self.current_store.open()
                try:
                    if self.job_config.direction != 'up':
                        logging.info(
                            'Loading remote changes with sequence %s' %
                            str(self.remote_seq))
                        if self.remote_seq == 0:
                            logger.log_state(
                                _('Gathering data from remote workspace, this can take a while...'
                                  ), 'sync')
                            very_first = True
                        self.remote_target_seq = self.load_remote_changes_in_store(
                            self.remote_seq, self.current_store)
                        self.current_store.sync()
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except RequestException as ce:
                    if not connection_helper.is_connected_to_internet():
                        error = _(
                            'No Internet connection detected! Waiting for %s seconds to retry'
                        ) % self.offline_timer
                    else:
                        error = _(
                            'Connection to server failed, server is probably down. Waiting %s seconds to retry'
                        ) % self.offline_timer
                    self.marked_for_snapshot_pathes = []
                    logging.error(error)
                    logger.log_state(error, "wait")
                    self.sleep_offline()
                    continue
                except Exception as e:
                    error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (
                        e.message, self.offline_timer)
                    logging.error(error)
                    logger.log_state(
                        _('Error while connecting to remote server (%s)') %
                        e.message, "error")
                    self.marked_for_snapshot_pathes = []
                    self.sleep_offline()
                    continue
                self.online_status = True
                if not self.job_config.server_configs:
                    self.job_config.server_configs = self.sdk.load_server_configs(
                    )
                self.sdk.set_server_configs(self.job_config.server_configs)

                if self.job_config.direction != 'down':
                    logging.info('Loading local changes with sequence ' +
                                 str(self.local_seq))
                    self.local_target_seq = self.db_handler.get_local_changes_as_stream(
                        self.local_seq, self.current_store.flatten_and_store)
                    self.current_store.sync()
                else:
                    self.local_target_seq = 1
                if not connection_helper.internet_ok:
                    connection_helper.is_connected_to_internet()

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected')
                    self.update_min_seqs_from_store()
                    self.exit_loop_clean(logger)
                    very_first = False
                    continue

                self.global_progress['status_indexing'] = 1
                logging.info('Reducing changes')
                logger.log_state(
                    _('Merging changes between remote and local, please wait...'
                      ), 'sync')

                logging.debug('Delete Copies')
                self.current_store.delete_copies()
                self.update_min_seqs_from_store()
                logging.debug('Dedup changes')
                self.current_store.dedup_changes()
                self.update_min_seqs_from_store()
                if not self.storage_watcher or very_first:
                    logging.debug('Detect unnecessary changes')
                    self.current_store.detect_unnecessary_changes(
                        local_sdk=self.system, remote_sdk=self.sdk)
                self.update_min_seqs_from_store()
                logging.debug('Clearing op and pruning folders moves')
                self.current_store.clear_operations_buffer()
                self.current_store.prune_folders_moves()
                self.update_min_seqs_from_store()

                logging.debug('Store conflicts')
                store_conflicts = self.current_store.clean_and_detect_conflicts(
                    self.db_handler)
                if store_conflicts:
                    logging.info('Conflicts detected, cannot continue!')
                    logger.log_state(_('Conflicts detected, cannot continue!'),
                                     'error')
                    self.current_store.close()
                    self.sleep_offline()
                    continue

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected')
                    self.exit_loop_clean(logger)
                    very_first = False
                    continue

                self.global_progress['status_indexing'] = 0
                import change_processor
                self.global_progress['queue_length'] = changes_length
                logging.info('Processing %i changes' % changes_length)
                logger.log_state(
                    _('Processing %i changes') % changes_length, "start")
                counter = [1]

                def processor_callback(change):
                    try:
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.update_current_tasks()
                        self.update_global_progress()
                        Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor
                        proc = Processor(change, self.current_store,
                                         self.job_config, self.system,
                                         self.sdk, self.db_handler,
                                         self.event_logger)
                        proc.process_change()
                        self.update_min_seqs_from_store(success=True)
                        self.global_progress['queue_done'] = float(counter[0])
                        counter[0] += 1
                        self.update_current_tasks()
                        self.update_global_progress()
                        time.sleep(0.1)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()

                    except ProcessException as pe:
                        logging.error(pe.message)
                        return False
                    except InterruptException as i:
                        raise i
                    except PydioSdkDefaultException as p:
                        raise p
                    except Exception as ex:
                        logging.exception(ex.message)
                        return False
                    return True

                try:
                    if sys.platform.startswith('win'):
                        self.marked_for_snapshot_pathes = list(
                            set(self.current_store.find_modified_parents()) -
                            set(self.marked_for_snapshot_pathes))
                    self.current_store.process_changes_with_callback(
                        processor_callback)
                except InterruptException as iexc:
                    pass
                logger.log_state(
                    _('%i files modified') %
                    self.global_progress['queue_done'], 'success')
                if self.global_progress['queue_done']:
                    logger.log_notif(
                        _('%i files modified') %
                        self.global_progress['queue_done'], 'success')

            except PydioSdkDefaultException as re:
                logging.error(re.message)
                logger.log_state(re.message, 'error')
            except SSLError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('An SSL error happened, please check the logs'), 'error')
            except ProxyError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('A proxy error happened, please check the logs'),
                    'error')
            except TooManyRedirects as rt:
                logging.error(rt.message)
                logger.log_state(_('Connection error: too many redirects'),
                                 'error')
            except ChunkedEncodingError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('Chunked encoding error, please check the logs'),
                    'error')
            except ContentDecodingError as rt:
                logging.error(rt.message)
                logger.log_state(
                    _('Content Decoding error, please check the logs'),
                    'error')
            except InvalidSchema as rt:
                logging.error(rt.message)
                logger.log_state(_('Http connection error: invalid schema.'),
                                 'error')
            except InvalidURL as rt:
                logging.error(rt.message)
                logger.log_state(_('Http connection error: invalid URL.'),
                                 'error')
            except Timeout as to:
                logging.error(to)
                logger.log_state(_('Connection timeout, will retry later.'),
                                 'error')
            except RequestException as ree:
                logging.error(ree.message)
                logger.log_state(_('Cannot resolve domain!'), 'error')
            except Exception as e:
                if not (e.message.lower().count('[quota limit reached]')
                        or e.message.lower().count('[file permissions]')):
                    logging.exception('Unexpected Error: %s' % e.message)
                    logger.log_state(
                        _('Unexpected Error: %s') % e.message, 'error')

            logging.debug('Finished this cycle, waiting for %i seconds' %
                          self.online_timer)
            self.exit_loop_clean(logger)
            very_first = False

    def update_min_seqs_from_store(self, success=False):
        self.local_seq = self.current_store.get_min_seq('local',
                                                        success=success)
        if self.local_seq == -1:
            self.local_seq = self.local_target_seq
        self.remote_seq = self.current_store.get_min_seq('remote',
                                                         success=success)
        if self.remote_seq == -1:
            self.remote_seq = self.remote_target_seq
        logging.debug('Storing sequences remote=%i local=%i', self.remote_seq,
                      self.local_seq)
        pickle.dump(dict(local=self.local_seq, remote=self.remote_seq),
                    open(self.configs_path + '/sequences', 'wb'))
        if self.event_handler:
            self.event_handler.last_seq_id = self.local_seq

    def ping_remote(self):
        """
        Simple stat of the remote server root, to know if it's reachable.
        :return:bool
        """
        test = self.sdk.stat('/')
        return (test != False)

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser:
            dispatcher.send(signal=PUBLISH_SIGNAL,
                            sender=self,
                            channel=channel,
                            message=message)

    def load_remote_changes_in_store(self, seq_id, store):
        last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        return last_seq
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""

    def __init__(self, job_config, job_data_path, pub_socket=False):
        threading.Thread.__init__(self)
        self.data_base = job_data_path
        self.job_config = job_config
        self.progress = 0

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.data_base, job_config.directory)
        self.interrupt = False
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        if pub_socket:
            self.pub_socket = pub_socket
            self.info('Job Started', toUser='******', channel='status')

        if os.path.exists(self.data_base + "/sequences"):
            sequences = pickle.load(open(self.data_base + "/sequences", "rb"))
            self.remote_seq = sequences['remote']
            self.local_seq = sequences['local']

        if job_config.direction != 'down':
            self.watcher = LocalWatcher(job_config.directory,
                                        job_config.filters['includes'],
                                        job_config.filters['excludes'],
                                        job_data_path)
        dispatcher.connect( self.handle_progress_event, signal=PROGRESS_SIGNAL, sender=dispatcher.Any )

    def handle_progress_event(self, sender, progress):
        self.info('Job progress is %s' % progress)

    def is_running(self):
        return self.job_status_running

    def pause(self):
        self.job_status_running = False
        self.info('Job Paused', toUser='******', channel='status')

    def resume(self):
        self.job_status_running = True
        self.info('Job Started', toUser='******', channel='status')

    def stop(self):
        if hasattr(self, 'watcher'):
            self.watcher.stop()
        self.interrupt = True

    def run(self):
        if hasattr(self, 'watcher'):
            self.watcher.start()

        while not self.interrupt:
            try:

                if not self.job_status_running:
                    time.sleep(self.online_timer)
                    continue

                if not self.system.check_basepath():
                    logging.info('Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry' % self.offline_timer)
                    time.sleep(self.offline_timer)
                    continue

                # Load local and/or remote changes, depending on the direction
                local_changes = dict(data=dict(), path_to_seqs=dict())
                remote_changes = dict(data=dict(), path_to_seqs=dict())
                try:
                    if self.job_config.direction != 'up':
                        logging.info('Loading remote changes with sequence ' + str(self.remote_seq))
                        self.remote_target_seq = self.get_remote_changes(self.remote_seq, remote_changes)
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except ConnectionError as ce:
                    logging.info('No connection detected, waiting %s seconds to retry' % self.offline_timer)
                    self.online_status = False
                    time.sleep(self.offline_timer)
                    continue
                except Exception as e:
                    logging.info('Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer))
                    self.online_status = False
                    time.sleep(self.offline_timer)
                    continue
                self.online_status = True

                if self.job_config.direction != 'down':
                    logging.info('Loading local changes with sequence ' + str(self.local_seq))
                    self.local_target_seq = self.db_handler.get_local_changes(self.local_seq, local_changes)
                else:
                    self.local_target_seq = 1

                self.local_seqs = local_changes['data'].keys() #map(lambda x:x['seq'], local_changes)
                self.remote_seqs = remote_changes['data'].keys() #map(lambda x:x['seq'], remote_changes)
                logging.info('Reducing changes')
                conflicts = []
                changes = self.reduce_changes(local_changes, remote_changes, conflicts)
                if len(conflicts):
                    logging.info('Conflicts detected, cannot continue!')
                    self.store_conflicts(conflicts)
                    self.job_status_running = False
                    time.sleep(self.offline_timer)
                    continue

                if len(changes):
                    logging.info('Processing %i changes' % len(changes))
                    i = 1
                    for change in changes:
                        try:
                            self.process_change(change)
                            self.remove_seq(change['seq'], change['location'])
                        except ProcessException as pe:
                            logging.error(pe.message)
                        except OSError as e:
                            logging.error(e.message)
                        #progress_percent = 100 * i / len(changes)
                        progress_percent = "{0:.2f}%".format(float(i)/len(changes) * 100)
                        dispatcher.send(signal=PROGRESS_SIGNAL, sender=self, progress=progress_percent)
                        #self.pub_socket.send_string("sync" + ' ' + str(i) + "/" + str(len(changes)) + " changes done : " + str(progressPercent) + "%")
                        i += 1
                        if self.interrupt:
                            break
                        time.sleep(0.01)
                else:
                    logging.info('No changes detected')
            except OSError as e:
                logging.error('Type Error! ')
            logging.info('Finished this cycle, waiting for %i seconds' % self.online_timer)
            time.sleep(self.online_timer)

    def remove_seq(self, seq_id, location):
        if location == 'local':
            self.local_seqs.remove(seq_id)
            if len(self.local_seqs):
                self.local_seq = min(min(self.local_seqs), self.local_target_seq)
            else:
                self.local_seq = self.local_target_seq
        else:
            self.remote_seqs.remove(seq_id)
            if len(self.remote_seqs):
                self.remote_seq = min(min(self.remote_seqs), self.remote_target_seq)
            else:
                self.remote_seq = self.remote_target_seq
        pickle.dump(dict(
            local=self.local_seq,
            remote=self.remote_seq
        ), open(self.data_base + '/sequences', 'wb'))

    def stat_path(self, path, location, stats=None, with_hash=False):
        try:
            if stats:
                return stats[path]
        except KeyError:
            pass

        if location == 'remote':
            return self.sdk.stat(path, with_hash)
        else:
            return self.system.stat(path, with_hash=True)

    def ping_remote(self):
        test = self.sdk.stat('/')
        return (test != False)

    def filter_change(self, item, my_stat=None, other_stats=None):

        location = item['location']
        opposite = 'local' if item['location'] == 'remote' else 'remote'
        res = False
        if item['type'] == 'create' or item['type'] == 'content':
            # If it does not exist on remote side, skip
            test_stat = self.stat_path(item['node']['node_path'], location=opposite, stats=other_stats, with_hash=True)
            if not test_stat:
                return False
            # If it exists but is a directory, it won't change
            if item['node']['md5'] == 'directory':
                res = True
            # If it exists and has same size, ok
            elif test_stat['size'] == item['node']['bytesize'] and 'hash' in test_stat and test_stat['hash'] == item['node']['md5']:
                res = True
        elif item['type'] == 'delete':
            # Shall we really delete it?
            loc_stat = self.stat_path(item['source'], location=location, stats=my_stat)
            if loc_stat:
                res = True
            # Shall we delete if already absent? no!
            test_stat = self.stat_path(item['source'], location=opposite, stats=other_stats)
            if not test_stat:
                res = True
        else:#MOVE
            source_stat = self.stat_path(item['source'], location=opposite, stats=other_stats)
            target_stat = self.stat_path(item['target'], location=opposite, stats=other_stats, with_hash=True)
            if not target_stat or source_stat:
                return False
            elif item['node']['md5'] == 'directory':
                res = True
            elif target_stat['size'] == item['node']['bytesize'] and 'hash' in target_stat and target_stat['hash'] == item['node']['md5']:
                res = True

        if res:
            if item['type'] != 'delete':
                logging.info('['+location+'] Filtering out ' + item['type'] + ': ' + item['node']['node_path'])
            else:
                logging.info('['+location+'] Filtering out ' + item['type'] + ' ' + item['source'])
            self.remove_seq(item['seq'], location)
            return True

        return False

    def changes_sorter(self, i1, i2):
        # no node: delete on top
        if not i1['node']:
            return -1
        if not i2['node']:
            return 1

        # directory
        if i1['node']['md5'] == 'directory' and i2['node']['md5'] == 'directory':
            return cmp(i1['node']['node_path'], i2['node']['node_path'])

        if i1['node']['md5'] == 'directory':
            return -1
        if i2['node']['md5'] == 'directory':
            return 1

        # sort on path otherwise
        return cmp(i1['node']['node_path'], i2['node']['node_path'])

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser and self.pub_socket:
            self.pub_socket.send_string(channel + "/" + message)

    def process_localMKDIR(self, path):
        message = path + ' <============ MKDIR'
        os.makedirs(self.basepath + path)
        self.info(message, 'New folder created at '+ path )

    def process_remoteMKDIR(self, path):
        message = 'MKDIR ============> ' + path
        self.info(message, toUser=False)
        self.sdk.mkdir(path)

    def process_localDELETE(self, path):
        if os.path.isdir(self.basepath + path):
            self.system.rmdir(path)
            self.info(path + ' <============ DELETE', 'Deleted folder ' + path)
        elif os.path.isfile(self.basepath + path):
            os.unlink(self.basepath + path)
            self.info(path + ' <============ DELETE', 'Deleted file ' + path)

    def process_remoteDELETE(self, path):
        self.sdk.delete(path)
        self.info('DELETE ============> ' + path, False)

    def process_localMOVE(self, source, target):
        if os.path.exists(self.basepath + source):
            if not os.path.exists(self.basepath + os.path.dirname(target)):
                os.makedirs(self.basepath + os.path.dirname(target))
            os.rename(self.basepath + source, self.basepath + target)
            self.info(source + ' to ' + target + ' <============ MOVE', 'Moved ' + source + ' to ' + target)
            return True
        return False

    def process_remoteMOVE(self, source, target):
        self.info('MOVE ============> ' + source + ' to ' + target, toUser=False)
        self.sdk.rename(source, target)

    def process_DOWNLOAD(self, path):
        self.db_handler.update_node_status(path, 'DOWN')
        self.sdk.download(path, self.basepath + path)
        self.db_handler.update_node_status(path, 'IDLE')
        self.info(path + ' <=============== ' + path, 'File ' + path + ' downloaded from server')

    def process_UPLOAD(self, path):
        self.db_handler.update_node_status(path, 'UP')
        self.sdk.upload(self.basepath+path, self.system.stat(path), path)
        self.db_handler.update_node_status(path, 'IDLE')
        self.info(path + ' ===============> ' + path, 'File ' + path + ' uploaded to server')


    def process_change(self, item):

        location = item['location']
        if self.direction == 'up' and location == 'remote':
            return
        if self.direction == 'down' and location == 'local':
            return

        if item['type'] == 'create' or item['type'] == 'content':
            if item['node']['md5'] == 'directory':
                if item['node']['node_path']:
                    logging.info('[' + location + '] Create folder ' + item['node']['node_path'])
                    if location == 'remote':
                        self.process_localMKDIR(item['node']['node_path'])
                        self.db_handler.buffer_real_operation(item['type'], 'NULL', item['node']['node_path'])
                    else:
                        self.process_remoteMKDIR(item['node']['node_path'])
            else:
                if item['node']['node_path']:
                    if location == 'remote':
                        self.process_DOWNLOAD(item['node']['node_path'])
                        if item['type'] == 'create':
                            self.db_handler.buffer_real_operation(item['type'], 'NULL', item['node']['node_path'])
                        else:
                            self.db_handler.buffer_real_operation(item['type'], item['node']['node_path'], item['node']['node_path'])
                    else:
                        self.process_UPLOAD(item['node']['node_path'])

        elif item['type'] == 'delete':
            logging.info('[' + location + '] Should delete ' + item['source'])
            if location == 'remote':
                self.process_localDELETE(item['source'])
                self.db_handler.buffer_real_operation('delete', item['source'], 'NULL')
            else:
                self.process_remoteDELETE(item['source'])

        else:
            logging.info('[' + location + '] Should move ' + item['source'] + ' to ' + item['target'])
            if location == 'remote':
                if os.path.exists(self.basepath + item['source']):
                    if self.process_localMOVE(item['source'], item['target']):
                        self.db_handler.buffer_real_operation(item['type'], item['source'], item['target'])
                else:
                    if item["node"]["md5"] == "directory":
                        logging.debug('Cannot find folder to move, switching to creation')
                        self.process_localMKDIR(item['target'])
                    else:
                        logging.debug('Cannot find source, switching to DOWNLOAD')
                        self.process_DOWNLOAD(item['target'])
                    self.db_handler.buffer_real_operation('create', 'NULL', item['target'])
            else:
                if self.sdk.stat(item['source']):
                    self.process_remoteMOVE(item['source'], item['target'])
                elif item['node']['md5'] != 'directory':
                    logging.debug('Cannot find source, switching to UPLOAD')
                    self.process_UPLOAD(item['target'])

    def reduce_changes(self, local_changes=dict(), remote_changes=dict(), conflicts=[]):

        rchanges = remote_changes['data'].values()
        lchanges = local_changes['data'].values()

        for seq, item in local_changes['data'].items():
            pathes = []
            if item['source'] != 'NULL':
                pathes.append(item['source'])
            if item['target'] != 'NULL':
                pathes.append(item['target'])
            # search these pathes in remote_changes
            remote_sequences = []
            for x in pathes:
                remote_sequences = remote_sequences + remote_changes['path_to_seqs'].setdefault(x, [])
            for seq_id in remote_sequences:
                otheritem = remote_changes['data'][seq_id]
                try:
                    if not (item['type'] == otheritem['type']):
                        continue
                    if not item['node'] and not otheritem['node'] and (item['source'] == otheritem['source']):
                        logging.debug('Reconciliation sequence for change (source)'+item['source'])
                        lchanges.remove(item)
                        rchanges.remove(otheritem)
                        self.remove_seq(item['seq'], 'local')
                        self.remove_seq(otheritem['seq'], 'remote')
                        break

                    if not (os.path.normpath(item['node']['node_path']) == os.path.normpath(otheritem['node']['node_path'])):
                        continue
                    if item['node']['bytesize'] == otheritem['node']['bytesize'] and item['node']['md5'] == otheritem['node']['md5']:
                        logging.debug('Reconciliation sequence for change (node)'+item['node']['node_path'])
                        lchanges.remove(item)
                        rchanges.remove(otheritem)
                        self.remove_seq(item['seq'], 'local')
                        self.remove_seq(otheritem['seq'], 'remote')
                        break
                except Exception as e:
                    pass

        test_stats = list(set(map(lambda it: it['source'] if it['source'] != 'NULL' else it['target'], lchanges)))
        remote_stats = None
        if len(test_stats):
            remote_stats = self.sdk.bulk_stat(test_stats, with_hash=True)

        rchanges = filter(lambda it: not self.filter_change(it, remote_stats, None), rchanges)
        lchanges = filter(lambda it: not self.filter_change(it, None, remote_stats), lchanges)

        last_ops = self.db_handler.get_last_operations()

        new_rchanges = []

        for item in lchanges:
            ignore = False
            for last in last_ops:
                if last['type'] == item['type'] and last['source'] == item['source'] and last['target'] == item['target']:
                    logging.info('IGNORING, RECENT MOVE FROM SERVER', last)
                    ignore = True
                    break
            if ignore:
                continue
            conflict = False
            for rItem in rchanges:
                if (not item['node'] and not rItem['node'] and rItem['source'] == rItem['source']) or (item['node'] and rItem['node'] and item['node']['node_path'] and rItem['node']['node_path'] and os.path.normpath(item['node']['node_path']) == os.path.normpath(rItem['node']['node_path'])):
                    # Seems there is a conflict - check
                    c_path = item['source']
                    if item['node']:
                        c_path = item['node']['node_path']
                    status = self.db_handler.get_node_status(c_path)
                    if status == 'SOLVED:KEEPLOCAL':
                        rchanges.remove(rItem)
                    elif status == 'SOLVED:KEEPREMOTE':
                        conflict = True
                    else:
                        conflict = True
                        rchanges.remove(rItem)
                        conflicts.append({'local':item,'remote':rItem})
                    break
            if conflict:
                continue
            new_rchanges.append(item)

        self.db_handler.clear_operations_buffer()

        # Sort to make sure directory operations are applied first
        rchanges = sorted(rchanges + new_rchanges, cmp=self.changes_sorter)

        # Prune changes : for DELETE and MOVE of Dir, remove all childrens
        toremove = []
        for i in range(len(rchanges)):
            ch = rchanges[i]
            if ch['type'] == 'path' and not ch['source'] == 'NULL' and not ch['target'] == 'NULL' and ch['node']['md5'] == 'directory':
                if i < len(rchanges)-1:
                    for j in range(i+1,len(rchanges)):
                        if rchanges[j]['source'] and rchanges[j]['type'] == 'path' and rchanges[j]['source'].startswith(ch['source']+'/'):
                            toremove.append(rchanges[j])

        if len(toremove):
            for r in toremove:
                if r in rchanges: rchanges.remove(r)

        return rchanges

    def store_conflicts(self, conflicts):
        for conflict in conflicts:
            local = conflict["local"]
            remote = conflict["remote"]
            if local["node"]:
                path = local["node"]["node_path"]
            elif local["source"]:
                path = local["source"]
            else:
                path = local["target"]
            self.db_handler.update_node_status(path, 'CONFLICT', pickle.dumps(remote))

    def get_remote_changes(self, seq_id, changes=dict()):

        logging.debug('Remote sequence ' + str(seq_id))
        data = self.sdk.changes(seq_id)
        for (i, item) in enumerate(data['changes']):
            item['location'] = 'remote'
            key = item['source'] if item['source'] != 'NULL' else item['target']
            if not key in changes['path_to_seqs']:
                changes['path_to_seqs'][key] = []
            changes['path_to_seqs'][key].append(item['seq'])
            changes['data'][item['seq']] = item

        return data['last_seq']
Beispiel #5
0
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        sqlite_files = [file for file in os.listdir(self.configs_path) if file.endswith(".sqlite")]

        from pydio.utils.check_sqlite import check_sqlite_file
        for sqlite_file in sqlite_files:
            try:
                exists_and_correct = check_sqlite_file(self.configs_path + "/" + sqlite_file)
                if exists_and_correct:
                    logging.info("Structure and Integrity of SQLite file %s is intact " % str(self.configs_path + "/" + sqlite_file))
            except DBCorruptedException as e:
                logging.debug("SQLite file %s is corrupted (Reason: %s), Deleting file and Reinitialising sync"
                              % (str(self.configs_path + "/" + sqlite_file),e.message))
                os.unlink(self.configs_path + "/" + sqlite_file)
                self.update_sequences_file(0, 0)

        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies()
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path, job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')

        self.marked_for_snapshot_pathes = []

        dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(includes=job_config.filters['includes'],
                                                 excludes=job_config.filters['excludes'],
                                                 basepath=job_config.directory,
                                                 job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.configs_path + "/sequences"):
            try:
                sequences = pickle.load(open(self.configs_path + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.configs_path + "/sequences")

        dispatcher.connect(self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False
Beispiel #6
0
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""

    @pydio_profile
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        sqlite_files = [file for file in os.listdir(self.configs_path) if file.endswith(".sqlite")]

        from pydio.utils.check_sqlite import check_sqlite_file
        for sqlite_file in sqlite_files:
            try:
                exists_and_correct = check_sqlite_file(self.configs_path + "/" + sqlite_file)
                if exists_and_correct:
                    logging.info("Structure and Integrity of SQLite file %s is intact " % str(self.configs_path + "/" + sqlite_file))
            except DBCorruptedException as e:
                logging.debug("SQLite file %s is corrupted (Reason: %s), Deleting file and Reinitialising sync"
                              % (str(self.configs_path + "/" + sqlite_file),e.message))
                os.unlink(self.configs_path + "/" + sqlite_file)
                self.update_sequences_file(0, 0)

        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies()
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path, job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')

        self.marked_for_snapshot_pathes = []

        dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(includes=job_config.filters['includes'],
                                                 excludes=job_config.filters['excludes'],
                                                 basepath=job_config.directory,
                                                 job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.configs_path + "/sequences"):
            try:
                sequences = pickle.load(open(self.configs_path + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.configs_path + "/sequences")

        dispatcher.connect(self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False

    def update_sequences_file(self, local_seq, remote_seq):
        pickle.dump(dict(
            local=local_seq,
            remote=remote_seq
        ), open(self.configs_path + '/sequences', 'wb'))

    @pydio_profile
    def handle_transfer_callback_event(self, sender, change):
        self.processing_signals[change['target']] = change
        self.global_progress["queue_bytesize"] -= change['bytes_sent']
        self.global_progress["queue_done"] += float(change['bytes_sent']) / float(change["total_size"])

    @pydio_profile
    def handle_transfer_rate_event(self, sender, transfer_rate):
        """
        Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value.
        :param sender:Any
        :param transfer_rate:float
        :return:
        """
        if self.global_progress['last_transfer_rate'] > 0:
            self.global_progress['last_transfer_rate'] = (float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0
        else:
            self.global_progress['last_transfer_rate'] = float(transfer_rate)

    @pydio_profile
    def is_running(self):
        """
        Whether the job is in Running state or not.
        :return:bool
        """
        return self.job_status_running

    def init_global_progress(self):
        """
        Initialize the internal progress data
        :return:None
        """
        self.global_progress = {
            'status_indexing'   :0,
            'queue_length'      :0,
            'queue_done'        :0.0,
            'queue_bytesize'    :0,
            'last_transfer_rate':-1,
            'queue_start_time'  :time.clock(),
            'total_time'        :0
        }

    @pydio_profile
    def update_global_progress(self, compute_queue_size=True):
        """
        Compute a dict representation with many indications about the current state of the queue
        :return: dict
        """
        self.global_progress['total_time'] = time.clock() - self.global_progress['queue_start_time']
        if compute_queue_size:
            self.global_progress["queue_bytesize"] = self.compute_queue_bytesize()
        # compute an eta
        eta = -1
        if self.global_progress['last_transfer_rate'] > -1 and self.global_progress['queue_bytesize'] > 0:
            eta = self.global_progress['queue_bytesize'] / self.global_progress['last_transfer_rate']
        elif self.global_progress['queue_done']:
            remaining_operations = self.global_progress['queue_length'] - self.global_progress['queue_done']
            eta = remaining_operations * self.global_progress['total_time'] / self.global_progress['queue_done']

        self.global_progress['eta'] = eta

        #logging.info(self.global_progress)
        return self.global_progress

    def get_global_progress(self):
        self.update_global_progress(compute_queue_size=False)
        return self.global_progress

    @pydio_profile
    def update_current_tasks(self, cursor=0, limit=5):
        """
        Get a list of the current tasks
        :return: list()
        """
        if not hasattr(self, 'current_store'):
            return []
        self.current_tasks = self.current_store.list_changes(cursor, limit)
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]

    @pydio_profile
    def get_current_tasks(self):
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]
        return {
            'total': self.global_progress['queue_length'],
            'current': self.current_tasks
        }

    @pydio_profile
    def compute_queue_bytesize(self):
        """
        Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue.
        :return:float
        """
        if not hasattr(self, 'current_store'):
            return 0
        total = 0
        exclude_pathes = []
        for task in self.processing_signals:
            if 'remaining_bytes' in task:
                total += float(task['remaining_bytes'])
                exclude_pathes.append('"' + task['target'] + '"')
        where = ''
        if len(exclude_pathes):
            where = "target IN (" + ','.join(exclude_pathes) + ")"
            return self.current_store.sum_sizes(where)
        else:
            return self.current_store.sum_sizes()

    @pydio_profile
    def start_now(self):
        """
        Resume task (set it in running mode) and make sure the cycle starts now
        :return:
        """
        self.last_run = 0
        self.sdk.remove_interrupt()
        self.resume()

    def pause(self):
        """
        Set the task in pause. The thread is still running, but the cycle does nothing.
        :return:None
        """
        self.job_status_running = False
        self.sdk.set_interrupt()
        self.info(_('Job Paused'), toUser='******', channel='status')

    @pydio_profile
    def resume(self):
        """
        Set the task out of pause mode.
        :return:
        """
        self.job_status_running = True
        self.sdk.remove_interrupt()
        self.info(_('Job Started'), toUser='******', channel='status')

    def stop(self):
        """
        Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop.
        :return:
        """
        if self.watcher:
            logging.debug("Stopping watcher: %s" % self.watcher)
            self.watcher.stop()
        self.info(_('Job stopping'), toUser='******', channel='status')
        self.sdk.set_interrupt()
        self.interrupt = True

    def sleep_offline(self):
        """
        Sleep the thread for a "long" time (offline time)
        :return:
        """
        self.online_status = False
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def sleep_online(self):
        """
        Sleep the thread for a "short" time (online time)
        :return:
        """
        self.online_status = True
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def exit_loop_clean(self, logger):
        self.marked_for_snapshot_pathes = []
        self.current_store.close()
        self.init_global_progress()
        logger.log_state(_('Synchronized'), 'success')
        if self.job_config.frequency == 'manual':
            self.job_status_running = False
            self.sleep_offline()
        else:
            self.sleep_online()


    @pydio_profile
    def run(self):
        """
        Start the thread
        """
        logger = EventLogger(self.configs_path)
        very_first = False

        if self.watcher:
            if self.watcher_first_run:
                def status_callback(status):
                    logger.log_state(status, 'sync')
                self.init_global_progress()

                try:
                    self.global_progress['status_indexing'] = 1
                    logger.log_state(_('Checking changes since last launch...'), "sync")
                    very_first = True
                    self.watcher.check_from_snapshot(state_callback=status_callback)
                except DBCorruptedException as e:
                    self.stop()
                    JobsLoader.Instance().clear_job_data(self.job_config.id)
                    logging.error(e)
                    return
                except Exception as e:
                    self.interrupt = True
                    logger.log_state(_('Oops, error while indexing the local folder. Pausing the task.'), 'error')
                    logging.error(e)

                self.watcher_first_run = False
            self.watcher.start()

        while not self.interrupt:

            try:
                # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq))
                self.processing_signals = {}
                self.init_global_progress()
                if very_first:
                    self.global_progress['status_indexing'] = 1

                interval = int(time.time() - self.last_run)
                if (self.online_status and interval < self.online_timer) or (not self.online_status and interval < self.offline_timer):
                    time.sleep(self.event_timer)
                    continue

                if not self.job_status_running:
                    logging.debug("self.online_timer: %s" % self.online_timer)
                    logger.log_state(_('Status: Paused'), "sync")
                    self.sleep_offline()
                    continue

                if self.job_config.frequency == 'time':
                    start_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']))
                    end_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59)
                    now = datetime.datetime.now().time()
                    if not start_time < now < end_time:
                        logger.log_state(_('Status: scheduled for %s') % str(start_time), "sync")
                        self.sleep_offline()
                        continue
                    else:
                        logging.info("Now triggering synchro as expected at time " + str(start_time))

                if not self.system.check_basepath():
                    log = _('Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry') % self.offline_timer
                    logging.error(log)
                    logger.log_state(_('Cannot find local folder, did you disconnect a volume?'), "error")
                    self.sleep_offline()
                    continue

                if self.watcher:
                    for snap_path in self.marked_for_snapshot_pathes:
                        logging.info('LOCAL SNAPSHOT : loading snapshot for directory %s' % snap_path)
                        if self.interrupt or not self.job_status_running:
                                                        raise InterruptException()
                        self.watcher.check_from_snapshot(snap_path)

                # Load local and/or remote changes, depending on the direction
                from pydio.job.change_stores import SqliteChangeStore
                self.current_store = SqliteChangeStore(self.configs_path + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes'])
                self.current_store.open()
                try:
                    if self.job_config.direction != 'up':
                        logging.info('Loading remote changes with sequence %s' % str(self.remote_seq))
                        if self.remote_seq == 0:
                            logger.log_state(_('Gathering data from remote workspace, this can take a while...'), 'sync')
                            very_first = True
                        self.remote_target_seq = self.load_remote_changes_in_store(self.remote_seq, self.current_store)
                        self.current_store.sync()
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except RequestException as ce:
                    if not connection_helper.is_connected_to_internet():
                        error = _('No Internet connection detected! Waiting for %s seconds to retry') % self.offline_timer
                    else:
                        error = _('Connection to server failed, server is probably down. Waiting %s seconds to retry') % self.offline_timer
                    self.marked_for_snapshot_pathes = []
                    logging.error(error)
                    logger.log_state(error, "wait")
                    self.sleep_offline()
                    continue
                except Exception as e:
                    error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer)
                    logging.error(error)
                    logger.log_state(_('Error while connecting to remote server (%s)') % e.message, "error")
                    self.marked_for_snapshot_pathes = []
                    self.sleep_offline()
                    continue
                self.online_status = True
                if not self.job_config.server_configs:
                    self.job_config.server_configs = self.sdk.load_server_configs()
                self.sdk.set_server_configs(self.job_config.server_configs)

                if self.job_config.direction != 'down':
                    logging.info('Loading local changes with sequence ' + str(self.local_seq))
                    self.local_target_seq = self.db_handler.get_local_changes_as_stream(self.local_seq, self.current_store.flatten_and_store)
                    self.current_store.sync()
                else:
                    self.local_target_seq = 1
                if not connection_helper.internet_ok:
                    connection_helper.is_connected_to_internet()

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected')
                    self.update_min_seqs_from_store()
                    self.exit_loop_clean(logger)
                    very_first = False
                    continue

                self.global_progress['status_indexing'] = 1
                logging.info('Reducing changes')
                logger.log_state(_('Merging changes between remote and local, please wait...'), 'sync')

                logging.debug('Delete Copies')
                self.current_store.delete_copies()
                self.update_min_seqs_from_store()
                logging.debug('Dedup changes')
                self.current_store.dedup_changes()
                self.update_min_seqs_from_store()
                if not self.storage_watcher or very_first:
                    logging.debug('Detect unnecessary changes')
                    self.current_store.detect_unnecessary_changes(local_sdk=self.system, remote_sdk=self.sdk)
                self.update_min_seqs_from_store()
                logging.debug('Clearing op and pruning folders moves')
                self.current_store.clear_operations_buffer()
                self.current_store.prune_folders_moves()
                self.update_min_seqs_from_store()

                logging.debug('Store conflicts')
                store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler)
                if store_conflicts:
                    logging.info('Conflicts detected, cannot continue!')
                    logger.log_state(_('Conflicts detected, cannot continue!'), 'error')
                    self.current_store.close()
                    self.sleep_offline()
                    continue

                changes_length = len(self.current_store)
                if not changes_length:
                    logging.info('No changes detected')
                    self.exit_loop_clean(logger)
                    very_first = False
                    continue

                self.global_progress['status_indexing'] = 0
                import change_processor
                self.global_progress['queue_length'] = changes_length
                logging.info('Processing %i changes' % changes_length)
                logger.log_state(_('Processing %i changes') % changes_length, "start")
                counter = [1]
                def processor_callback(change):
                    try:
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()
                        self.update_current_tasks()
                        self.update_global_progress()
                        Processor = StorageChangeProcessor if self.storage_watcher else ChangeProcessor
                        proc = Processor(change, self.current_store, self.job_config, self.system, self.sdk,
                                               self.db_handler, self.event_logger)
                        proc.process_change()
                        self.update_min_seqs_from_store(success=True)
                        self.global_progress['queue_done'] = float(counter[0])
                        counter[0] += 1
                        self.update_current_tasks()
                        self.update_global_progress()
                        time.sleep(0.1)
                        if self.interrupt or not self.job_status_running:
                            raise InterruptException()

                    except ProcessException as pe:
                        logging.error(pe.message)
                        return False
                    except InterruptException as i:
                        raise i
                    except PydioSdkDefaultException as p:
                        raise p
                    except Exception as ex:
                        logging.exception(ex.message)
                        return False
                    return True

                try:
                    if sys.platform.startswith('win'):
                        self.marked_for_snapshot_pathes = list(set(self.current_store.find_modified_parents()) - set(self.marked_for_snapshot_pathes))
                    self.current_store.process_changes_with_callback(processor_callback)
                except InterruptException as iexc:
                    pass
                logger.log_state(_('%i files modified') % self.global_progress['queue_done'], 'success')
                if self.global_progress['queue_done']:
                    logger.log_notif(_('%i files modified') % self.global_progress['queue_done'], 'success')

                self.exit_loop_clean(logger)

            except PydioSdkDefaultException as re:
                logging.error(re.message)
                logger.log_state(re.message, 'error')
            except SSLError as rt:
                logging.error(rt.message)
                logger.log_state(_('An SSL error happened, please check the logs'), 'error')
            except ProxyError as rt:
                logging.error(rt.message)
                logger.log_state(_('A proxy error happened, please check the logs'), 'error')
            except TooManyRedirects as rt:
                logging.error(rt.message)
                logger.log_state(_('Connection error: too many redirects'), 'error')
            except ChunkedEncodingError as rt:
                logging.error(rt.message)
                logger.log_state(_('Chunked encoding error, please check the logs'), 'error')
            except ContentDecodingError as rt:
                logging.error(rt.message)
                logger.log_state(_('Content Decoding error, please check the logs'), 'error')
            except InvalidSchema as rt:
                logging.error(rt.message)
                logger.log_state(_('Http connection error: invalid schema.'), 'error')
            except InvalidURL as rt:
                logging.error(rt.message)
                logger.log_state(_('Http connection error: invalid URL.'), 'error')
            except Timeout as to:
                logging.error(to)
                logger.log_state(_('Connection timeout, will retry later.'), 'error')
            except RequestException as ree:
                logging.error(ree.message)
                logger.log_state(_('Cannot resolve domain!'), 'error')
            except Exception as e:
                if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')):
                    logging.exception('Unexpected Error: %s' % e.message)
                    logger.log_state(_('Unexpected Error: %s') % e.message, 'error')

            logging.debug('Finished this cycle, waiting for %i seconds' % self.online_timer)
            very_first = False

    @pydio_profile
    def update_min_seqs_from_store(self, success=False):
        self.local_seq = self.current_store.get_min_seq('local', success=success)
        if self.local_seq == -1:
            self.local_seq = self.local_target_seq
        self.remote_seq = self.current_store.get_min_seq('remote', success=success)
        if self.remote_seq == -1:
            self.remote_seq = self.remote_target_seq
        logging.debug('Storing sequences remote=%i local=%i', self.remote_seq, self.local_seq)
        self.update_sequences_file(self.local_seq, self.remote_seq)
        if self.event_handler:
            self.event_handler.last_seq_id = self.local_seq

    @pydio_profile
    def ping_remote(self):
        """
        Simple stat of the remote server root, to know if it's reachable.
        :return:bool
        """
        test = self.sdk.stat('/')
        return (test != False)

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser:
            dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message)

    @pydio_profile
    def load_remote_changes_in_store(self, seq_id, store):
        last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        return last_seq
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.data_base = job_data_path
        self.job_config = job_config
        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.data_base, job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.data_base)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(includes=job_config.filters['includes'],
                                                 excludes=job_config.filters['excludes'],
                                                 basepath=job_config.directory,
                                                 job_data_path=job_data_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        job_data_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.data_base + "/sequences"):
            try:
                sequences = pickle.load(open(self.data_base + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.data_base + "/sequences")


        dispatcher.connect( self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=dispatcher.Any )
        dispatcher.connect( self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=dispatcher.Any )

        if self.job_config.frequency == 'manual':
            self.job_status_running = False
class ContinuousDiffMerger(threading.Thread):
    """Main Thread grabbing changes from both sides, computing the necessary changes to apply, and applying them"""

    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.data_base = job_data_path
        self.job_config = job_config
        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl
        )
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.data_base, job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.data_base)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel='status', message='START')
        if job_config.direction != 'down':
            self.event_handler = SqlEventHandler(includes=job_config.filters['includes'],
                                                 excludes=job_config.filters['excludes'],
                                                 basepath=job_config.directory,
                                                 job_data_path=job_data_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        job_data_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(self.data_base + "/sequences"):
            try:
                sequences = pickle.load(open(self.data_base + "/sequences", "rb"))
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(self.data_base + "/sequences")


        dispatcher.connect( self.handle_transfer_rate_event, signal=TRANSFER_RATE_SIGNAL, sender=dispatcher.Any )
        dispatcher.connect( self.handle_transfer_callback_event, signal=TRANSFER_CALLBACK_SIGNAL, sender=dispatcher.Any )

        if self.job_config.frequency == 'manual':
            self.job_status_running = False


    def handle_transfer_callback_event(self, sender, change):
        self.processing_signals[change['target']] = change
        self.global_progress["queue_bytesize"] -= change['bytes_sent']
        self.global_progress["queue_done"] += float(change['bytes_sent']) / float(change["total_size"])

    def handle_transfer_rate_event(self, sender, transfer_rate):
        """
        Handler for TRANSFER_SIGNAL to update the transfer rate internally. It's averaged with previous value.
        :param sender:Any
        :param transfer_rate:float
        :return:
        """
        if self.global_progress['last_transfer_rate'] > 0:
            self.global_progress['last_transfer_rate'] = (float(transfer_rate) + self.global_progress['last_transfer_rate']) / 2.0
        else:
            self.global_progress['last_transfer_rate'] = float(transfer_rate)

    def is_running(self):
        """
        Whether the job is in Running state or not.
        :return:bool
        """
        return self.job_status_running

    def init_global_progress(self):
        """
        Initialize the internal progress data
        :return:None
        """
        self.global_progress = {
            'queue_length'      :0,
            'queue_done'        :0.0,
            'queue_bytesize'    :0,
            'last_transfer_rate':-1,
            'queue_start_time'  :time.clock(),
            'total_time'        :0
        }

    def update_global_progress(self):
        """
        Compute a dict representation with many indications about the current state of the queue
        :return: dict
        """
        self.global_progress['total_time'] = time.clock() - self.global_progress['queue_start_time']
        self.global_progress["queue_bytesize"] = self.compute_queue_bytesize()
        # compute an eta
        eta = -1
        if self.global_progress['last_transfer_rate'] > -1 and self.global_progress['queue_bytesize'] > 0 :
            eta = self.global_progress['queue_bytesize'] / self.global_progress['last_transfer_rate']
        elif self.global_progress['queue_done']:
            remaining_operations = self.global_progress['queue_length'] - self.global_progress['queue_done']
            eta = remaining_operations * self.global_progress['total_time'] / self.global_progress['queue_done']

        self.global_progress['eta'] = eta

        #logging.info(self.global_progress)
        return self.global_progress

    def get_global_progress(self):
        return self.global_progress

    def update_current_tasks(self, cursor=0, limit=5):
        """
        Get a list of the current tasks
        :return: list()
        """
        if not hasattr(self, 'current_store'):
            return []
        self.current_tasks = self.current_store.list_changes(cursor, limit)
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]

    def get_current_tasks(self):
        for change in self.current_tasks:
            if change['target'] in self.processing_signals:
                progress = self.processing_signals[change['target']]
                for key in progress.keys():
                    change[key] = progress[key]
        return {
            'total': self.global_progress['queue_length'],
            'current': self.current_tasks
        }

    def compute_queue_bytesize(self):
        """
        Sum all the bytesize of the nodes that are planned to be uploaded/downloaded in the queue.
        :return:float
        """
        if not hasattr(self, 'current_store'):
            return 0
        total = 0
        exclude_pathes = []
        for task in self.processing_signals:
            if 'remaining_bytes' in task:
                total += float(task['remaining_bytes'])
                exclude_pathes.append('"' + task['target'] + '"')
        where = ''
        if len(exclude_pathes):
            where = "target IN (" + ','.join(exclude_pathes) + ")"
            return self.current_store.sum_sizes(where)
        else:
            return self.current_store.sum_sizes()

    def start_now(self):
        """
        Resume task (set it in running mode) and make sure the cycle starts now
        :return:
        """
        self.last_run = 0
        self.resume()

    def pause(self):
        """
        Set the task in pause. The thread is still running, but the cycle does nothing.
        :return:None
        """
        self.job_status_running = False
        self.info('Job Paused', toUser='******', channel='status')

    def resume(self):
        """
        Set the task out of pause mode.
        :return:
        """
        self.job_status_running = True
        self.info('Job Started', toUser='******', channel='status')

    def stop(self):
        """
        Set the thread in "interrupt" mode : will try to stop cleanly, and then the thread will stop.
        :return:
        """
        if hasattr(self, 'watcher'):
            logging.debug("Stopping watcher: %s" % self.watcher)
            self.watcher.stop()
        self.info('Job stopping', toUser='******', channel='status')
        self.interrupt = True

    def sleep_offline(self):
        """
        Sleep the thread for a "long" time (offline time)
        :return:
        """
        self.online_status = False
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def sleep_online(self):
        """
        Sleep the thread for a "short" time (online time)
        :return:
        """
        self.online_status = True
        self.last_run = time.time()
        time.sleep(self.event_timer)

    def run(self):
        """
        Start the thread
        """
        logger = EventLogger(self.data_base)
        very_first = False

        if self.watcher:
            if self.watcher_first_run:
                logger.log_state('Checking changes since last launch...', "sync")
                very_first = True
                self.watcher.check_from_snapshot()
                self.watcher_first_run = False
            self.watcher.start()

        while not self.interrupt:

            try:
                # logging.info('Starting cycle with cycles local %i and remote %is' % (self.local_seq, self.remote_seq))
                self.processing_signals = {}
                self.init_global_progress()
                interval = int(time.time() - self.last_run)
                if (self.online_status and interval < self.online_timer) or (not self.online_status and interval < self.offline_timer):
                    time.sleep(self.event_timer)
                    continue

                if not self.job_status_running:
                    logging.debug("self.online_timer: %s" % self.online_timer)
                    logger.log_state('Status: Paused', "sync")
                    self.sleep_offline()
                    continue

                if self.job_config.frequency == 'time':
                    start_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']))
                    end_time = datetime.time(int(self.job_config.start_time['h']), int(self.job_config.start_time['m']), 59)
                    now = datetime.datetime.now().time()
                    if not start_time < now < end_time:
                        logger.log_state('Status: scheduled for %s' % str(start_time), "sync")
                        self.sleep_offline()
                        continue
                    else:
                        logging.info("Now triggering synchro as expected at time " + start_time)

                if not self.system.check_basepath():
                    log = 'Cannot find local folder! Did you disconnect a volume? Waiting %s seconds before retry' % self.offline_timer
                    logging.error(log)
                    logger.log_state('Cannot find local folder, did you disconnect a volume?', "error")
                    self.sleep_offline()
                    continue

                # Load local and/or remote changes, depending on the direction
                from pydio.job.change_stores import SqliteChangeStore
                self.current_store = SqliteChangeStore(self.data_base + '/changes.sqlite', self.job_config.filters['includes'], self.job_config.filters['excludes'])
                self.current_store.open()
                try:
                    if self.job_config.direction != 'up':
                        logging.info('Loading remote changes with sequence ' + str(self.remote_seq))
                        if self.remote_seq == 0:
                            logger.log_state('Gathering data from remote workspace, this can take a while...', 'sync')
                            very_first = True
                        self.remote_target_seq = self.load_remote_changes_in_store(self.remote_seq, self.current_store)
                        self.current_store.sync()
                    else:
                        self.remote_target_seq = 1
                        self.ping_remote()
                except ConnectionError as ce:
                    error = 'No connection detected, waiting %s seconds to retry' % self.offline_timer
                    logging.error(error)
                    logger.log_state(error, "wait")
                    self.sleep_offline()
                    continue
                except Exception as e:
                    error = 'Error while connecting to remote server (%s), waiting for %i seconds before retempting ' % (e.message, self.offline_timer)
                    logging.error(error)
                    logger.log_state('Error while connecting to remote server (%s)' % e.message, "error")
                    self.sleep_offline()
                    continue
                self.online_status = True
                if not self.job_config.server_configs:
                    self.job_config.server_configs = self.sdk.load_server_configs()
                self.sdk.set_server_configs(self.job_config.server_configs)

                if self.job_config.direction != 'down':
                    logging.info('Loading local changes with sequence ' + str(self.local_seq))
                    self.local_target_seq = self.db_handler.get_local_changes_as_stream(self.local_seq, self.current_store.flatten_and_store)
                    self.current_store.sync()
                else:
                    self.local_target_seq = 1

                logging.info('Reducing changes')

                self.current_store.delete_copies()
                self.update_min_seqs_from_store()
                self.current_store.dedup_changes()
                self.update_min_seqs_from_store()
                self.current_store.detect_unnecessary_changes(local_sdk=self.system, remote_sdk=self.sdk)
                self.update_min_seqs_from_store()
                #self.current_store.filter_out_echoes_events()
                #self.update_min_seqs_from_store()
                self.current_store.clear_operations_buffer()
                self.current_store.prune_folders_moves()
                self.update_min_seqs_from_store()

                store_conflicts = self.current_store.clean_and_detect_conflicts(self.db_handler)
                if store_conflicts:
                    logging.info('Conflicts detected, cannot continue!')
                    logger.log_state('Conflicts detected, cannot continue!', 'error')
                    self.current_store.close()
                    self.sleep_offline()
                    continue

                changes_length = len(self.current_store)
                if changes_length:
                    import change_processor
                    self.global_progress['queue_length'] = changes_length
                    logging.info('Processing %i changes' % changes_length)
                    logger.log_state('Processing %i changes' % changes_length, "start")
                    counter = [1]
                    def processor_callback(change):
                        try:
                            if self.interrupt or not self.job_status_running:
                                raise InterruptException()
                            self.update_current_tasks()
                            self.update_global_progress()
                            proc = ChangeProcessor(change, self.current_store, self.job_config, self.system, self.sdk,
                                                   self.db_handler, self.event_logger)
                            proc.process_change()
                            self.update_min_seqs_from_store(success=True)
                            self.global_progress['queue_done'] = float(counter[0])
                            counter[0] += 1
                            self.update_current_tasks()
                            self.update_global_progress()
                            time.sleep(0.1)
                            if self.interrupt or not self.job_status_running:
                                raise InterruptException()

                        except ProcessException as pe:
                            logging.error(pe.message)
                            return False
                        except InterruptException as i:
                            raise i
                        except PydioSdkDefaultException as p:
                            raise p
                        except Exception as ex:
                            logging.exception(ex.message)
                            return False
                        return True

                    try:
                        self.current_store.process_changes_with_callback(processor_callback)
                    except InterruptException as iexc:
                        pass
                    logger.log_state('%i files modified' % self.global_progress['queue_done'], "success")
                else:
                    logging.info('No changes detected')
                    if very_first:
                        logger.log_state('Remote and local are synchronized', 'success')

            except PydioSdkDefaultException as re:
                logging.warning(re.message)
                logger.log_state(re.message, 'error')
            except Exception as e:
                if not (e.message.lower().count('[quota limit reached]') or e.message.lower().count('[file permissions]')):
                    logging.exception('Unexpected Error: %s' % e.message)
                    logger.log_state('Unexpected Error: %s' % e.message, 'error')


            logging.info('Finished this cycle, waiting for %i seconds' % self.online_timer)
            self.current_store.close()
            self.init_global_progress()
            if self.job_config.frequency == 'manual':
                self.job_status_running = False
                self.sleep_offline()
            else:
                self.sleep_online()

    def update_min_seqs_from_store(self, success=False):
        self.local_seq = self.current_store.get_min_seq('local', success=success)
        if self.local_seq == -1:
            self.local_seq = self.local_target_seq
        self.remote_seq = self.current_store.get_min_seq('remote', success=success)
        if self.remote_seq == -1:
            self.remote_seq = self.remote_target_seq
        logging.debug('Storing sequences remote %i local %i', self.local_seq, self.remote_seq)
        pickle.dump(dict(
            local=self.local_seq,
            remote=self.remote_seq
        ), open(self.data_base + '/sequences', 'wb'))
        if self.event_handler:
            self.event_handler.last_seq_id = self.local_seq

    def ping_remote(self):
        """
        Simple stat of the remote server root, to know if it's reachable.
        :return:bool
        """
        test = self.sdk.stat('/')
        return (test != False)

    def info(self, message, toUser=False, channel='sync'):
        logging.info(message)
        if toUser:
            dispatcher.send(signal=PUBLISH_SIGNAL, sender=self, channel=channel, message=message)

    def load_remote_changes_in_store(self, seq_id, store):
        last_seq = self.sdk.changes_stream(seq_id, store.flatten_and_store)
        return last_seq
Beispiel #9
0
    def __init__(self, job_config, job_data_path):
        """
        Initialize thread internals
        :param job_config: JobConfig instance
        :param job_data_path: Filesystem path where the job data are stored
        :return:
        """
        threading.Thread.__init__(self)
        self.last_run = 0
        self.configs_path = job_data_path
        self.job_config = job_config
        sqlite_files = [
            file for file in os.listdir(self.configs_path)
            if file.endswith(".sqlite")
        ]

        from pydio.utils.check_sqlite import check_sqlite_file
        for sqlite_file in sqlite_files:
            try:
                exists_and_correct = check_sqlite_file(
                    os.path.join(self.configs_path, sqlite_file))
                if exists_and_correct:
                    logging.info(
                        "Structure and Integrity of SQLite file %s is intact "
                        % str(os.path.join(self.configs_path, sqlite_file)))
            except DBCorruptedException as e:
                logging.debug(
                    "SQLite file %s is corrupted (Reason: %s), Deleting file and Reinitialising sync"
                    % (str(os.path.join(self.configs_path,
                                        sqlite_file)), e.message))
                os.unlink(os.path.join(self.configs_path, sqlite_file))
                self.update_sequences_file(0, 0)

        self.init_global_progress()

        self.basepath = job_config.directory
        self.ws_id = job_config.workspace
        self.sdk = PydioSdk(
            job_config.server,
            ws_id=self.ws_id,
            remote_folder=job_config.remote_folder,
            user_id=job_config.user_id,
            device_id=ConfigManager.Instance().get_device_id(),
            skip_ssl_verify=job_config.trust_ssl,
            proxies=ConfigManager.Instance().get_defined_proxies(),
            timeout=job_config.timeout)
        self.system = SystemSdk(job_config.directory)
        self.remote_seq = 0
        self.local_seq = 0
        self.local_target_seq = 0
        self.remote_target_seq = 0
        self.local_seqs = []
        self.remote_seqs = []
        self.db_handler = LocalDbHandler(self.configs_path,
                                         job_config.directory)
        self.interrupt = False
        self.event_timer = 2
        self.online_timer = 10
        self.offline_timer = 60
        self.online_status = True
        self.job_status_running = True
        self.direction = job_config.direction
        self.event_logger = EventLogger(self.configs_path)
        self.processing_signals = {}
        self.current_tasks = []
        self.event_handler = None
        self.watcher = None
        self.watcher_first_run = True
        # TODO: TO BE LOADED FROM CONFIG
        self.storage_watcher = job_config.label.startswith('LSYNC')

        self.marked_for_snapshot_pathes = []

        dispatcher.send(signal=PUBLISH_SIGNAL,
                        sender=self,
                        channel='status',
                        message='START')
        if job_config.direction != 'down' or (
                self.job_config.direction == 'down'
                and self.job_config.solve != 'remote'):
            self.event_handler = SqlEventHandler(
                includes=job_config.filters['includes'],
                excludes=job_config.filters['excludes'],
                basepath=job_config.directory,
                job_data_path=self.configs_path)
            self.watcher = LocalWatcher(job_config.directory,
                                        self.configs_path,
                                        event_handler=self.event_handler)
            self.db_handler.check_lock_on_event_handler(self.event_handler)

        if os.path.exists(os.path.join(self.configs_path, "sequences")):
            try:
                with open(os.path.join(self.configs_path, "sequences"),
                          "rb") as f:
                    sequences = pickle.load(f)
                self.remote_seq = sequences['remote']
                self.local_seq = sequences['local']
                if self.event_handler:
                    self.event_handler.last_seq_id = self.local_seq

            except Exception:
                # Wrong content, remove sequences file.
                os.unlink(os.path.join(self.configs_path, "sequences"))

        dispatcher.connect(self.handle_transfer_rate_event,
                           signal=TRANSFER_RATE_SIGNAL,
                           sender=self.sdk)
        dispatcher.connect(self.handle_transfer_callback_event,
                           signal=TRANSFER_CALLBACK_SIGNAL,
                           sender=self.sdk)

        if self.job_config.frequency == 'manual':
            self.job_status_running = False