def lock_domain(domain, lock_timeout=300, wait=True): """ Implementation for lock/try_lock, very common and therefore in an own function """ global GLOBAL_LOCK_TABLE if not os.path.exists(paths.get_domain_path(domain)): raise ProtocolError('Domain does not exist.') if domain in GLOBAL_LOCK_TABLE: # We do not want to wait, so raise a ProtocolError immediately if GLOBAL_LOCK_TABLE[domain].is_locked and wait is False: raise ProtocolError('Already locked.') try: #if thread_name not in thread_name: # Create a new lock if domain not in GLOBAL_LOCK_TABLE: GLOBAL_LOCK_TABLE[domain] = lock.FileLock( file_name=domain, folder=paths.get_content_root(), timeout=lock_timeout) # Lock, or wait for lock. GLOBAL_LOCK_TABLE[domain].acquire() # Convert other exceptions to a ProtocolError except lock.FileLockException as err: raise ProtocolError(str(err)) except OSError as err: del GLOBAL_LOCK_TABLE[domain] raise ProtocolError(str(err))
def checkout_handler(args): """ Checkout a certain branch (usually a commitTag or master) checkout [domain] {branch_name} * domain is e.g. www.heise.de * branch_name the entity to checkout, if omitted only the path is returned and no git work is done * Returns: The Path to the checkout'd domain Note: You should always checkout master when you're done! """ domain = args[0] domain_path = paths.get_domain_path(domain) if os.path.exists(domain_path) is False: raise ProtocolError('Invalid Domain.') try: branch = args[1] except IndexError: branch = None if branch is not None: wrapper = Git(domain) rcode = wrapper.checkout(branch) if rcode is not 0: raise ProtocolError('checkout returned {rc}'.format(rc=rcode)) return domain_path + '\n'
def start_sync(self): """ Starts rsync procedure (rsync submodule) to mirror temporaray source to destination -> content archive path """ content_path = paths.get_content_root() itemlist = os.listdir(self.__path) for domain in itemlist: domain_path = paths.get_domain_path(domain) fsmutex = lock.FileLock(domain, folder=content_path, timeout=100) fsmutex.acquire() try: logging.debug('Creating directory: ' + domain_path) os.mkdir(domain_path) except OSError: # This is expected # (I swear) pass git_proc = git.Git(domain) git_proc.init() git_proc.checkout('empty') git_proc.branch(self.__metalist[0]['commitTime']) rsync(os.path.join(self.__path, domain), content_path) git_proc.commit('Site {domain_name} was crawled.' .format(domain_name=domain)) git_proc.recreate_master() fsmutex.release()
def test_commit(self): # Test if commit fails without committable data self.assertTrue(checkAck(self.commit(TESTDOMAIN))) # checkout empty testbranch (doesn't harm) self.checkout(TESTDOMAIN, 'empty') # Get a commitlist, and check if valid commit_list_before = self.dialog('list_commits ' + TESTDOMAIN) self.assertFalse(checkAck(commit_list_before)) # make some committable changes with open(os.path.join(paths.get_domain_path(TESTDOMAIN), 'testfile'), 'a') as test_file: test_file.write('hello world\n') # check if commit works self.assertTrue(checkOk(self.commit(TESTDOMAIN))) # after the first commit, another commit shouldn't work anymore self.assertTrue(checkAck(self.commit(TESTDOMAIN))) # get another list of commits, and check for growth commit_list_after = self.dialog('list_commits ' + TESTDOMAIN) self.assertFalse(checkAck(commit_list_after)) self.assertTrue(len(commit_list_after.splitlines()) == len(commit_list_before.splitlines()) + 1) # Checkout maste again self.checkout(TESTDOMAIN, 'master')
def __init__(self, domain=None, abs_path=None): """Instance a new Git Wrapper for a certain domain :domain: A domain found in the archive :abs_path: If domain is not given you may pass an abs path to the repo """ if domain is not None: self.__domain = paths.get_domain_path(domain) elif abs_path is not None: self.__domain = abs_path else: raise ValueError('Neither domain, nor abs_path passed') self.__gitdir = os.path.join(self.__domain, '.git') self.__empty = os.path.join(self.__domain, 'empty_file') # Error checking via regex matches self.__branch_pattern = re.compile( '[0-9]{4}(H[0-9]{2}){2}T[0-9]{2}(C[0-9]{2}){2}$') self.__commit_pattern = re.compile('[0-9a-z]{40}$') self.__basecmd = 'git --git-dir {gitdir} --work-tree {gitcwd} '.format( gitdir=self.__gitdir, gitcwd=self.__domain)
def checkout(self, domain, branch): # checking out given domain + branch # checks with assertEquals for plausibility self.assertEqual(bytes(paths.get_domain_path(domain), 'UTF-8') + b'\nOK\n', self.dialog('checkout ' + domain + ' ' + branch))