Example #1
0
def janitor():
    """Ideally this is maintained by a systemd service to cleanup redis and the
    file system while Fractalis is running.
    """
    data_dir = os.path.join(app.config['FRACTALIS_TMP_DIR'], 'data')
    tracked_ids = [key.split(':')[1] for key in redis.scan_iter('data:*')]
    if not os.path.exists(data_dir):
        for task_id in tracked_ids:
            async_result = celery.AsyncResult(task_id)
            if async_result.state == 'SUCCESS':
                redis.delete('data:{}'.format(task_id))
        return

    cached_files = [f for f in os.listdir(data_dir)
                    if os.path.isfile(os.path.join(data_dir, f))]

    # clean cached files
    for cached_file in cached_files:
        if cached_file not in tracked_ids:
            sync.remove_file(os.path.join(data_dir, cached_file))

    # clean tracked files
    for task_id in tracked_ids:
        path = os.path.join(data_dir, task_id)
        async_result = celery.AsyncResult(task_id)
        if async_result.state == 'SUCCESS' and not os.path.exists(path):
            redis.delete('data:{}'.format(task_id))
Example #2
0
 def remove_duplicates(self, data_tasks: List[str],
                       descriptor: dict) -> None:
     """Delete the duplicates of the given descriptor from redis and call
     the janitor afterwards to cleanup orphaned files.
     :param data_tasks: Limit duplicate search to.
     :param descriptor: ETL descriptor. Used to identify duplicates.
     """
     task_ids = self.find_duplicates(data_tasks, descriptor)
     for task_id in task_ids:
         redis.delete('data:{}'.format(task_id))
     if task_ids:
         janitor.delay()
Example #3
0
def remove_data(task_id: str) -> None:
    """Remove all traces of any data associated with the given id. That includes
    redis and the file system.
    :param task_id: The id associated with a data state
    :param wait: Wait for all subtasks to finish before returning
    """
    key = 'data:{}'.format(task_id)
    value = redis.get(key)
    celery.control.revoke(task_id, terminate=True, signal='SIGUSR1')
    redis.delete(key)
    if value:
        data_state = json.loads(value)
        remove_file(data_state['file_path'])
    else:
        logger.warning("Can't delete file for task id '{}',because there is "
                       "no associated entry in Redis.".format(task_id))