Esempio n. 1
0
def lock_domain(domain, lock_timeout=300, wait=True):
    """
    Implementation for lock/try_lock, very common
    and therefore in an own function
    """
    global GLOBAL_LOCK_TABLE

    if not os.path.exists(paths.get_domain_path(domain)):
        raise ProtocolError('Domain does not exist.')

    if domain in GLOBAL_LOCK_TABLE:
        # We do not want to wait, so raise a ProtocolError immediately
        if GLOBAL_LOCK_TABLE[domain].is_locked and wait is False:
            raise ProtocolError('Already locked.')

    try:
        #if thread_name not in thread_name:
        # Create a new lock
        if domain not in GLOBAL_LOCK_TABLE:
            GLOBAL_LOCK_TABLE[domain] = lock.FileLock(
                    file_name=domain,
                    folder=paths.get_content_root(),
                    timeout=lock_timeout)

        # Lock, or wait for lock.
        GLOBAL_LOCK_TABLE[domain].acquire()

    # Convert other exceptions to a ProtocolError
    except lock.FileLockException as err:
        raise ProtocolError(str(err))
    except OSError as err:
        del GLOBAL_LOCK_TABLE[domain]
        raise ProtocolError(str(err))
Esempio n. 2
0
    def start_sync(self):
        """
        Starts rsync procedure (rsync submodule) to mirror temporaray source
        to destination -> content archive path
        """
        content_path = paths.get_content_root()
        itemlist = os.listdir(self.__path)
        for domain in itemlist:
            domain_path = paths.get_domain_path(domain)
            fsmutex = lock.FileLock(domain, folder=content_path, timeout=100)
            fsmutex.acquire()

            try:
                logging.debug('Creating directory: ' + domain_path)
                os.mkdir(domain_path)
            except OSError:
                # This is expected
                # (I swear)
                pass

            git_proc = git.Git(domain)
            git_proc.init()
            git_proc.checkout('empty')
            git_proc.branch(self.__metalist[0]['commitTime'])

            rsync(os.path.join(self.__path, domain), content_path)

            git_proc.commit('Site {domain_name} was crawled.'
                            .format(domain_name=domain))
            git_proc.recreate_master()
            fsmutex.release()
Esempio n. 3
0
def clear_locks():
    """
    Searches for *.lock and tries to remove found filesystem locks
    """
    cmd = 'find {path} -iname "*.lock"'.format(path=paths.get_content_root())
    try:
        locklist = str(subprocess.check_output(cmd, shell=True), 'UTF-8').splitlines()

        if len(locklist) > 0:
            logging.info('Wil remove following files:')
            for lockfile in locklist:
                logging.info('rm -f ' + lockfile)
                os.remove(lockfile)
    except subprocess.CalledProcessError:
        logging.exception('Cannot clear locks')
Esempio n. 4
0
def repair():
    """
    Walks through domain hierarchy invoking repair() and clear_locks()
    """
    try:
        # Make sure all repos are on the most recent one
        # Additional errorchecking might take place here
        domain_patt = os.path.join(paths.get_content_root(), '*')
        for domain in glob.glob(domain_patt):
            logging.info('- Sanitizing: ' + domain)
            sanitize_domain(domain)

        # Remove all .lock files that are left by (very) rough shutdowns
        clear_locks()
    except OSError:
        logging.exception('Unable to repair archive')
Esempio n. 5
0
    def load(self):
        """
        Invokes threaded xml recovery
        """
        try:
            self.__init__()
            domain_patt = os.path.join(paths.get_content_root(), '*')
            domain_list = glob.glob(domain_patt)
            threadPool = ThreadPool(16)
            threadPool.map(self.recover_domain, domain_list)
            threadPool.close()
            threadPool.join()
        except KeyboardInterrupt:
            print('Got interrupted')
        finally:
            self.__shutdown = True
            threadPool.close()
            threadPool.join()

        return self.__metalist