def lambda_handler(event, context): print('local repo creation started') local_repo = Repo.init('/tmp/css_download', mkdir=True) print('local repo creation successful') s3 = boto3.resource('s3') print('local repo creation ended') for record in event['Records']: bucket = record['s3']['bucket']['name'] key = record['s3']['object']['key'] print(key) break s3local = '/tmp/aws.zip' s3final = '/tmp/css_download' s3.Bucket(bucket).download_file(key, s3local) list = os.listdir('/tmp/') print(list) zip_ref = zipfile.ZipFile(s3local, 'r') zip_ref.extractall(s3final) zip_ref.close() list = os.listdir('/tmp/css_download/') print(list) print('Stage started') Repo.stage(local_repo, list) print('Commit started') Repo.do_commit(local_repo, b"new commit", committer=b"sandeep <*****@*****.**>") print('Push started') porcelain.push( "/tmp/css_download", "https://sandeep.s.k-at-574112450463:N2YDDTf+71bXZUNZjiF6YKFDGYXgPsIhI1GxbIVm+Wg=@git-codecommit.us-east-2.amazonaws.com/v1/repos/css-repo", "master") print('Push successful')
def dulwichCommit(self, filePath, fullPath, kind): git = Repo(AUTOGIT_PATH) staged = map(str, [filePath]) git.stage(staged) index = git.open_index() try: committer = git._get_user_identity() except ValueError: committer = "autogit" try: head = git.head() except KeyError: return git.do_commit('%s - autogit commit (via dulwich)' % kind, committer=committer) changes = list( tree_changes(git, index.commit(git.object_store), git['HEAD'].tree)) if changes and len(changes) > 0: return git.do_commit('%s - autogit commit (via dulwich)' % kind, committer=committer) return None
def GitSave(self, vguuid, commentStr): """ Check in changes to config files into git repository """ try: #srv = Connection(self.serverDNS,self.userName,self.keyFile) self.srv.get('/temp/scst.conf', self.iscsiconfdir + self.serverDNS + '.scst.conf') self.srv.get( '/temp/' + vguuid, self.iscsiconfdir + self.serverDNS + '.' + vguuid + '.lvm') try: repo = Repo(self.iscsiconfdir) filelist = [ f for f in listdir(self.iscsiconfdir) if isfile(join(self.iscsiconfdir, f)) ] repo.stage(filelist) repo.do_commit(commentStr) except: var = format_exc() logger.error("During GitSave: %s: Git save error: %s" % (commentStr, var)) except: var = format_exc() logger.error("During GitSave: %s: PYSFTP download error: %s" % (commentStr, var))
class Git(): """ object that holds the git repository """ def __init__(self): self.repo_path = user_data_dir(appname, appauthor) self.files_under_version_controll = ['config.json', 'data.json'] # initialize repo if it doesn't exist try: self.repo = Repo(self.repo_path) except NotGitRepository: # create repo if not os.path.exists(self.repo_path): try: os.makedirs(self.repo_path) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise Repo.init(self.repo_path) self.repo = Repo(self.repo_path) self.commit('initial commit') def commit(self, message): """ commits the current status of files_under_version_controll :param message: str; commit message """ self.repo.stage(self.files_under_version_controll) self.repo.do_commit(str.encode(message), str.encode('nextSongs')) def get_current_head(self): """ get sha as bytes of current head :return: bytes; sha1 checksum of current head """ return self.repo.head() def get_commits(self): """ generates a list of last commits :return: list-of-dulwich.objects.Commit """ commits = [] for i in self.repo.get_walker(): commits.append(i.commit) return reversed( sorted( commits, key=lambda x: datetime.datetime.fromtimestamp(x.author_time))) def restore(self, commit): """ does a hard reset to a given commit :param commit: list-of-dulwich.objects.Commit; commit to reset to """ porcelain.reset(self.repo, 'hard', str.encode(commit.sha().hexdigest())) self.commit("Restored setting and data.") Config.read_config()
def do_import(commits, repo_loc, overwrite = True, author_="Règlement général <*****@*****.**>"): if exists(repo_loc): if overwrite: print("Deleting existing output directory: %s" % repo_loc) shutil.rmtree(repo_loc) os.mkdir(repo_loc) repo = Repo.init(repo_loc) else: repo = Repo(repo_loc) else: os.mkdir(repo_loc) repo = Repo.init(repo_loc) print("Importing %d commit(s)" % len(commits)) for i, commit in enumerate(commits): date = commit[0] print("Commit %d dated %s, %d items" % (i, str(date), len(commit[1]))) print(" authored by %s" % author_) paths_added, paths_removed = create_tree(commit, repo_loc, readme=False, main=commit[2] if len(commit) == 3 else {}) repo.stage([path.encode(sys.getfilesystemencoding()) for path in set(paths_added)]) index = repo.open_index() print(" Removing %d files" % len(paths_removed)) for p in paths_removed: del index[p.encode(sys.getfilesystemencoding())] index.write() author = bytes(author_, "UTF-8") repo.do_commit( bytes("Version du %s" % date.strftime(FMT), "UTF-8"), committer=author, commit_timestamp=date.timestamp(), commit_timezone=int(TZ_PARIS.localize(date).strftime("%z")) * 36) ## create tag tag_name = bytes(date.strftime(ISO_8601), "UTF-8") object = parse_object(repo, "HEAD") tag = Tag() tag.tagger = author tag.name = tag_name tag.message = b'' tag.object = (type(object), object.id) tag.tag_time = int(time.time()) tag.tag_timezone = int(TZ_PARIS.localize(date).strftime("%z")) * 36 repo.object_store.add_object(tag) tag_id = tag.id repo.refs[b'refs/tags/' + tag_name] = tag_id repo.close()
def makeRepo(self, tree_contents): repo = GitRepo(self.repository_path) blobs = [(Blob.from_string(contents), filename) for (filename, contents) in tree_contents] repo.object_store.add_objects(blobs) root_id = dulwich.index.commit_tree( repo.object_store, [(filename, b.id, stat.S_IFREG | 0644) for (b, filename) in blobs]) repo.do_commit(committer='Joe Foo <*****@*****.**>', message=u'<The commit message>', tree=root_id)
def GitSave(self,commentStr): """ Check in changes to config files into git repository """ try: repo = Repo(self.iscsiconfdir) filelist = [ f for f in listdir(self.iscsiconfdir) if isfile(join(self.iscsiconfdir,f)) ] repo.stage(filelist) repo.do_commit(commentStr) return 1 except: var = format_exc() logger.error("During GitSave %s: Git save error: %s" % (commentStr,var)) return -1
def makeRepo(self, repository_name, tree_contents): repository_path = os.path.join(self.repository_store, repository_name) os.makedirs(repository_path) self.createRepository(repository_path, bare=self._use_server) repo = GitRepo(repository_path) blobs = [ (Blob.from_string(contents), filename) for (filename, contents) in tree_contents] repo.object_store.add_objects(blobs) root_id = dulwich.index.commit_tree(repo.object_store, [ (filename, b.id, stat.S_IFREG | 0o644) for (b, filename) in blobs]) repo.do_commit(committer='Joe Foo <*****@*****.**>', message=u'<The commit message>', tree=root_id)
def writefile(namespacepath, path, data): """ Writes data to a file. @param fullpath: fullpath to a file @return: True or False """ fullpath = "%s/%s" % (namespacepath, path) # Write the data to the file try: f = open(fullpath, 'w') f.write(data) f.close() except: return (False, "Could not write file %s" % fullpath) # Now add it to git. try: repo = Repo(namespacepath) repo.stage(path) # Obviously, we'll want to get this commit info from somewhere else. commit_id = repo.do_commit( "An API commit", committer="API Committer <*****@*****.**>") except: return (False, "Could not commit file %s to namespace %s" % (path, namespace)) return (True, "Commited as %s" % commit_id)
def commit(repo: Repo, msg: str) -> str: """Commit everything.""" for tree_path, entry in repo.open_index().items(): full_path = os.path.join(repo.path.encode(), tree_path) blob = blob_from_path_and_stat(full_path, os.lstat(full_path)) if blob.id != entry.sha: repo.stage(tree_path) return repo.do_commit(msg.encode(), b"Source{d} ML Team <*****@*****.**>")
def _dulwich_commit(self, author, message=DEFAULT_COMMIT_MSG): """ Commit staged files in the repo """ _repo = Repo(self.config['top_dir']) commit_id = _repo.do_commit(message, committer=author) if not _repo.head() == commit_id: raise SartorisError(message=exit_codes[14], exit_code=14)
def GitSave(self,vguuid,commentStr): """ Check in changes to config files into git repository """ try: #srv = Connection(self.serverDNS,self.userName,self.keyFile) self.srv.get('/temp/scst.conf',self.iscsiconfdir+self.serverDNS+'.scst.conf') self.srv.get('/temp/'+vguuid,self.iscsiconfdir+self.serverDNS+'.'+vguuid+'.lvm') try: repo = Repo(self.iscsiconfdir) filelist = [ f for f in listdir(self.iscsiconfdir) if isfile(join(self.iscsiconfdir,f)) ] repo.stage(filelist) repo.do_commit(commentStr) except: var = format_exc() logger.error("During GitSave: %s: Git save error: %s" % (commentStr, var)) except: var = format_exc() logger.error("During GitSave: %s: PYSFTP download error: %s" % (commentStr, var))
def test_last_revision_info(self): reva = self.simple_commit_a() self.build_tree(['b']) r = GitRepo(".") r.stage("b") revb = r.do_commit(b"b", committer=b"Somebody <*****@*****.**>") thebranch = Branch.open('.') self.assertEqual((2, default_mapping.revision_id_foreign_to_bzr(revb)), thebranch.last_revision_info())
def commit(root, path, author): repo = Repo(root) repo.stage([path]) return repo.do_commit('Automated commit', committer='Git-dropbox', author=author, commit_timestamp=int(time()), commit_timezone=parse_timezone('-0200')[0], author_timestamp=os.path.getctime(os.path.join(root, path)), encoding='UTF-8')
def commit(root, path, author): repo = Repo(root) repo.stage([path]) return repo.do_commit('Automated commit', committer='Git-dropbox', author=author, commit_timestamp=int(time()), commit_timezone=parse_timezone('-0200')[0], author_timestamp=os.path.getctime( os.path.join(root, path)), encoding='UTF-8')
def dulwichCommit(self, filePath, fullPath, kind): git = Repo(AUTOGIT_PATH) staged = map(str,[filePath]) git.stage( staged ) index = git.open_index() try: committer = git._get_user_identity() except ValueError: committer = "autogit" try: head = git.head() except KeyError: return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer) changes = list(tree_changes(git, index.commit(git.object_store), git['HEAD'].tree)) if changes and len(changes) > 0: return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer) return None
class GitStore(FileStore): '''Git versioned filesystem based object storage frontend.''' init = 'git://' def __init__(self, engine, **kw): super(GitStore, self).__init__(engine, **kw) try: self._repo = Repo(self._dir) except NotGitRepository: self._repo = Repo.init(self._dir) def __setitem__(self, key, value): super(GitStore, self).__setitem__(key, value) fname = quote_plus(key) self._repo.stage([fname]) self._repo.do_commit('added {0}'.format(fname), committer='shove') def __delitem__(self, key): super(GitStore, self).__delitem__(key) fname = quote_plus(key) self._repo.stage([fname]) self._repo.do_commit('removed {0}'.format(fname))
class GitStore(FSStore): __slots__ = ("_autocommit", "_repo") author = "GitStore <git@indicium>" def __init__(self, path=".", extension=".data", autocommit=True): super(GitStore, self).__init__(path, extension) self._autocommit = autocommit gitdir = P.join(self._path, ".git") if P.isdir(gitdir): self._repo = Repo(self._path) else: self._repo = Repo.init(self._path) def commit(self, message, author=None): message += "\n\nCommitted by indicium.git.GitStore" if author is None: author = self.author author = author.encode() self._repo.do_commit(committer=author, author=author, message=message.encode()) def put(self, key, value): super(GitStore, self).put(key, value) self._repo.stage([self.path_for_key(key)]) if self._autocommit: self.commit("put: {!s}".format(normalize(key))) def delete(self, key): path = self.path_for_key(key) if not P.exists(P.join(self._path, path)): return super(GitStore, self).delete(key) self._repo.stage([path]) if self._autocommit: self.commit("delete: {!s}".format(normalize(key)))
class GitStore(FileStore): """Git versioned filesystem based object storage frontend.""" init = "git://" def __init__(self, engine, **kw): super(GitStore, self).__init__(engine, **kw) try: self._repo = Repo(self._dir) except NotGitRepository: self._repo = Repo.init(self._dir) def __setitem__(self, key, value): super(GitStore, self).__setitem__(key, value) fname = quote_plus(key) self._repo.stage([fname]) self._repo.do_commit("added {0}".format(fname), committer="shove") def __delitem__(self, key): super(GitStore, self).__delitem__(key) fname = quote_plus(key) self._repo.stage([fname]) self._repo.do_commit("removed {0}".format(fname))
def test_normal_from_repo(self): # Create repo folder = Path.cwd() try: rp = Repo(str(folder)) except NotGitRepository: rp = Repo.init(str(folder)) try: version = rp.head().decode() self.original_revision = version except KeyError: FILE_NAME_TEST = 'file_test.txt' test_file = folder / FILE_NAME_TEST test_file.touch() rp.stage(FILE_NAME_TEST.encode()) version = rp.do_commit(b'Test commit').decode() v = get_source_revision() assert v == version[:10]
def do_commit(request): project = request.matchdict['project'] page = request.matchdict['page'] content = request.POST["content"] #construction of the wiki path wikiroot = request.registry.settings['wiki.root'] #from settings in .ini file. wikipath = os.path.join(wikiroot, project) #project name is the name of the git repository rootfilepath = os.path.join(wikipath, page) #we want one specific file into this directory files = glob.glob(rootfilepath+".*") #list files with any extension log.debug(files) f = files[0] #we take the first matching file, undertermined results if two files only differs by extension handle = open(f, "w") fcntl.lockf(handle, fcntl.LOCK_EX) #acquire a file lock for the opened file repo = Repo(wikipath) strfilename = str(os.path.split(f)[1]) #filename relative to wikipath (ie subdirectory/file.wiki) filename_relative_to_wiki = os.path.relpath(f, wikipath).encode("ascii") commit_id = get_last_commit_id(repo, filename_relative_to_wiki) log.debug("last commit id is %s and post['lastcommitid'] is %s"%(commit_id, request.POST['lastcommitid'])) log.debug(commit_id == request.POST['lastcommitid']) #is it a new file or is this file unchanged since form generation? if commit_id is None or commit_id == request.POST['lastcommitid']: #no, so go on and do the commit userinfos = get_user_infos(request) handle.write(content.encode('utf-8')) handle.flush() repo.stage([filename_relative_to_wiki]) log.debug("staging %s"%filename_relative_to_wiki) rep=repo.do_commit("edited online with pygiwi", committer="%(name)s <%(email)s>"%userinfos) log.debug("commit anwser is: " + rep) log.debug('wrote new content to file: %s '%f) else: raise RuntimeError("file %s was changed before the commit"%f) handle.close() #closing the file relases the lock
def cmd_new(args, config): try: repo = Repo(args.path) except NotGitRepository: sys.exit('It does not look like a valid repository.') if DI_BRANCH not in get_branch_list(repo): sys.exit('Not initialized by dotissue. Use init first.') if not args.title: EDITOR = os.environ.get('EDITOR', 'vim') initial_message = "<Issue title here>" with tempfile.NamedTemporaryFile(suffix=".tmp") as msgfile: msgfile.write(initial_message) msgfile.flush() call([EDITOR, msgfile.name]) with open(msgfile.name, 'r') as f: args.title = f.read() title = args.title.strip() object_store = repo.object_store tree = repo[repo['refs/heads/%s' % DI_BRANCH].tree] tree_issue = Tree() blob = Blob.from_string(title) tree_issue.add("_title", 0100644, blob.id) object_store.add_object(blob) tree.add(blob.id, 0040000, tree_issue.id) object_store.add_object(tree_issue) object_store.add_object(tree) msg = (title[:60] + '..') if len(title) > 60 else title commit = repo.do_commit("New issue: %s" % msg, commit_timezone=-timezone, tree=tree.id, ref='refs/heads/%s' % DI_BRANCH) print 'Issue created : %s' % commit sys.exit(0)
def cmd_init(args, config): try: repo = Repo(args.path) except NotGitRepository: sys.exit('It does not look like a valid repository.') if DI_BRANCH in get_branch_list(repo): sys.exit('Already initialized.') object_store = repo.object_store tree = Tree() # tree_issues = Tree() # tree_labels = Tree() blob = Blob.from_string("This is a branch for dotissue.\n") tree.add("README.md", 0100644, blob.id) object_store.add_object(blob) # # blob = Blob.from_string("This is a directory for issues.\n") # tree_issues.add("README.md", 0100644, blob.id) # object_store.add_object(blob) # # blob = Blob.from_string("This is a directory for labels.\n") # tree_labels.add("README.md", 0100644, blob.id) # object_store.add_object(blob) # # tree.add(DI_ISSUES, 0040000, tree_issues.id) # tree.add(DI_LABELS, 0040000, tree_labels.id) # # object_store.add_object(tree_issues) # object_store.add_object(tree_labels) object_store.add_object(tree) commit = repo.do_commit("Initial commit", commit_timezone=-timezone, tree=tree.id, ref='refs/heads/%s' % DI_BRANCH) print 'Initialized by %s. :^D' % commit sys.exit(0)
class Wiki(HookMixin): path = None base_path = '/' default_ref = 'master' default_committer_name = 'Anon' default_committer_email = '*****@*****.**' index_page = 'home' repo = None def __init__(self, path): try: self.repo = Repo(path) except NotGitRepository: self.repo = Repo.init(path, mkdir=True) self.path = path def __repr__(self): return "Wiki: %s" % self.path def commit(self, name, email, message, files): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ print "commit" if isinstance(name, unicode): name = name.encode('utf-8') if isinstance(email, unicode): email = email.encode('utf-8') if isinstance(message, unicode): message = message.encode('utf-8') author = committer = "%s <%s>" % (name, email) self.repo.stage(files) return self.repo.do_commit(message=message, committer=committer, author=author) def push(commit, approved_by): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ print commit output = subprocess.check_output(["git", "pull"]) print output output = subprocess.check_output(["git", "rebase -i " + commit]) print output output = subprocess.check_output([":x"]) print output output = subprocess.check_output( ["git", "push origin " + commit + ":master"]) print output def get_page(self, name, sha='HEAD'): """Get page data, partials, commit info. :param name: Name of page. :param sha: Commit sha. :return: dict """ return WikiPage(name, self, sha=sha) def get_index(self): """Get repo index of head. :return: list -- List of dicts """ rv = [] index = self.repo.open_index() for name in index: rv.append( dict(name=filename_to_cname(name), filename=name, ctime=index[name].ctime[0], mtime=index[name].mtime[0], sha=index[name].sha, size=index[name].size)) return rv
class Store(object): """ A simple key/value store using git as the backing store. """ def __init__(self, id, repo_path, serializer=None): self.id = id if os.path.exists(repo_path): self.repo = Repo(repo_path) else: raise ValueError("Store repo path does not exist: %s" % repo_path) if not serializer: self.serializer = json else: self.serializer = serializer self.lock = threading.RLock() def gc(self): with self.lock: if which('git'): repo_dir = self.repo.path try: log.info("starting gc on repo %s" % repo_dir) subprocess.check_call("git gc --auto", cwd=repo_dir, shell=True) log.info("finished gc on repo %s" % repo_dir) self.repo = Repo(self.repo.path) except subprocess.CalledProcessError: log.exception("git gc failed for repo %s" % repo_dir) def create_branch(self, branch, parent=None): with self.lock: if not parent: parent = self.branch_head('master') branch_ref = self._branch_ref_name(branch) self.repo.refs.add_if_new(branch_ref, parent) return {'sha': self.branch_head(branch)} def merge(self, source_branch, target_branch='master', author=None, committer=None): with self.lock: if source_branch == target_branch: raise ValueError("Cannot merge branch with itself %s" % source_branch) target_tree = self._get_object(ROOT_PATH, target_branch) branch_tree = self._get_object(ROOT_PATH, source_branch) for tc in diff_tree.tree_changes(self.repo.object_store, target_tree.id, branch_tree.id): if tc.type == diff_tree.CHANGE_ADD: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),)) if tc.type == diff_tree.CHANGE_COPY: pass if tc.type == diff_tree.CHANGE_DELETE: target_tree = self._delete(tc.old.path, target_branch) if tc.type == diff_tree.CHANGE_MODIFY: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),)) if tc.type == diff_tree.CHANGE_RENAME: pass if tc.type == diff_tree.CHANGE_UNCHANGED: pass msg = "Merge %s to %s" % (source_branch, target_branch) merge_heads = [self.branch_head(source_branch)] sha = self.repo.do_commit( tree=target_tree.id, message=msg, ref=self._branch_ref_name(target_branch), merge_heads=merge_heads, author=author, committer=committer ) return {'sha': sha} def get(self, key, shallow=False, branch='master', commit_sha=None): """ Get a tree or blob from the store by key. The key param can be paths such as 'a/b/c'. If the key requested represents a Tree in the git db, then a document will be returned in the form of a python dict. If the key requested represents a Blob in the git db, then a python string will be returned. :param key: The key to retrieve from the store :param branch: The branch name to search for the requested key :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob """ if not commit_sha: commit_sha = self.branch_head(branch) obj = self._get_object(key, branch, commit_sha) if obj: if isinstance(obj, Blob): return self.serializer.loads(obj.data) elif isinstance(obj, Tree): keys = key.split('/') min_level = len(filter(None, keys)) if shallow: max_level = min_level+1 else: max_level = sys.maxint tree = self.trees(key, min_level=min_level, max_level=max_level, branch=branch, commit_sha=commit_sha) if keys != [ROOT_PATH]: for k in keys: tree = tree[k] tree['commit_sha'] = commit_sha return tree return None def _get_object(self, key, branch='master', commit_sha=None, bypass_head_cache=True): try: if not commit_sha: commit_sha = self.branch_head(branch) (mode, sha) = tree_lookup_path(self.repo.get_object, self._repo_tree(commit_sha), key) return self.repo[sha] except KeyError: return None except NotTreeError: return None def diff(self, old_sha, new_sha=None): """Show the changed files between OLD_SHA and NEW_SHA If NEW_SHA is not set, it will default to HEAD. The output is a list of tuples (action, filename) :param old_sha: parent commit's sha :param new_sha: another sha, defaults to HEAD :retval: dict """ orig = self._get_object(ROOT_PATH, commit_sha=old_sha) new = self._get_object(ROOT_PATH) if new_sha: new = self._get_object(ROOT_PATH, commit_sha=new_sha) keys = { diff_tree.CHANGE_DELETE: 'delete', diff_tree.CHANGE_ADD: 'add', diff_tree.CHANGE_MODIFY: 'modify'} out = defaultdict(list) for change_tree in diff_tree.tree_changes(self.repo.object_store, orig.id, new.id, want_unchanged=False): if change_tree.type.lower() == "delete" and change_tree.old.path: # if the change was a delete, we have no tree or blob to yield so return key with no value # return in the same type of structure for consistency out[change_tree.type].append([(change_tree.old.path, None)]) else: out[change_tree.type].append(filter(None, self.entries(change_tree.new.path))) return out def put(self, key, value, flatten_keys=True, branch='master', author=None, committer=None, overwrite=False): """ Add/Update many key value pairs in the store. The entries param should be a python dict containing one or more key value pairs to store. The keys can be nested paths of objects to set. :param key: The key to store the entry/entries in :param value: The value to store. """ with self.lock: e = {key: value} if flatten_keys: e = flatten(e) root_tree = self._get_object(ROOT_PATH, branch) merge_heads = [] if not root_tree: root_tree = self._get_object(ROOT_PATH) merge_heads = [self.branch_head('master')] blobs=[] msg = '' existing_obj = None if type(value) == types.DictType: try: existing_obj = self.get(key, shallow=True, branch=branch) except: pass if existing_obj: if 'commit_sha' in existing_obj: del existing_obj['commit_sha'] existing_obj = flatten({key: existing_obj}) for (k, value) in e.iteritems(): blob = Blob.from_string(self.serializer.dumps(value)) self.repo.object_store.add_object(blob) blobs.append((k, blob.id, stat.S_IFREG)) if existing_obj and k in existing_obj: if existing_obj[k] != value: msg += "Put %s\n" % k del existing_obj[k] else: msg += "Put %s\n" % k if overwrite and existing_obj: for k in existing_obj: self.delete(k, branch=branch) root_tree = self._get_object(ROOT_PATH, branch) root_id = self._add_tree(root_tree, blobs) sha = self.repo.do_commit( tree=root_id, message=msg, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer ) return {'sha': sha} def delete(self, key, branch='master', author=None, committer=None): """ Delete one or more entries from the store. The key param can refer to either a Tree or Blob in the store. If it refers to a Blob, then just that entry will be removed. If it refers to a Tree, then that entire subtree will be removed. :param key: The key to remove from the store. """ with self.lock: tree = self._get_object(key, branch) merge_heads = [] delete_branch = branch if not tree: merge_heads = [self.branch_head('master')] delete_branch = 'master' root = self._delete(key, delete_branch) sha = self.repo.do_commit( tree=root.id, message="Delete %s" % key, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer ) return {'sha': sha} def _delete(self, key, branch='master'): trees={} path = key if path: while path: (path, name) = pathsplit(path) trees[path] = self._get_object(path, branch) else: trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch) (path, name) = pathsplit(key) if name: del trees[path][name] else: for entry in trees[path].iteritems(): del trees[path][entry.path] if path: while path: (parent_path, name) = pathsplit(path) trees[parent_path].add(name, stat.S_IFDIR, trees[path].id) self.repo.object_store.add_object(trees[path]) path = parent_path self.repo.object_store.add_object(trees[ROOT_PATH]) else: self.repo.object_store.add_object(trees[ROOT_PATH]) return trees[ROOT_PATH] def _repo_tree(self, commit_sha): return self.repo[commit_sha].tree def keys(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, filter_by=None, branch='master', commit_sha=None): """ Returns a list of keys from the store. The path param can be used to scope the request to return keys from a subset of the tree. The filter_by param can be used to control whether to return keys for Blob nodes, Tree nodes or all nodes. Default is to return all node keys from the root of the store. :param path: The starting point retrieve key paths from. Default is '' which starts from the root of the store. :param filter_by: Either 'blob', 'tree' or None. Controls what type of node key paths to return. Default is None which returns all node type key paths :param branch: The branch name to return key paths for. :return: A list of keys sorted lexically. """ if filter_by == 'blob': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob) elif filter_by == 'tree': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree) else: filter_fn = None return map(lambda x: x[0], filter(filter_fn, self.iteritems(path, pattern, min_level, max_level, depth_first, branch, commit_sha))) def entries(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, branch='master', commit_sha=None): for key, obj in self.iteritems(path, pattern, min_level, max_level, depth_first, branch, commit_sha): if isinstance(obj, Blob): yield (key, self.serializer.loads(str(obj.data))) def iteritems(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, branch='master', commit_sha=None): try: import gevent except: gevent = None def _node(level, path, node): return level, path, node root = self._get_object(path, branch=branch, commit_sha=commit_sha) bypass_head_cache=False level = len(filter(None, path.split('/'))) if min_level is None: min_level = 0 if max_level is None: max_level = sys.maxint nodes_to_visit = collections.deque([_node(level, path, root)]) while len(nodes_to_visit) > 0: # allow server to yield to other greenlets during long tree traversals if gevent: gevent.sleep(0) (level, path, node) = nodes_to_visit.popleft() if isinstance(node, Tree): children = filter(lambda child: min_level < child[0] <= max_level, map(lambda child: _node(level+1, *self._tree_entry(path, child, bypass_head_cache)), node.iteritems())) if depth_first: nodes_to_visit.extendleft(children) else: nodes_to_visit.extend(children) if min_level < level <= max_level: if pattern is not None: if pattern.match(path): yield (path, node) else: yield (path, node) def trees(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, object_depth=None, branch='master', commit_sha=None): """ Returns a python dict representation of the store. The resulting dict can be scoped to a particular subtree in the store with the tree or path params. The tree param is a git Tree object to begin from, while the path is a string key to begin from. The branch param is used to specify the git branch name to build the dict from. :param path: Option string key to begin building the dict from. Defaults to '' which starts from the root of the store. :param pattern: Regex pattern to filter matching tree paths. does full tree traversal. :param branch: Optional git branch name to return key paths from. Defaults to HEAD. :return: A dict represents a section of the store. """ tree = {} for key, value in self.entries(path, pattern, min_level, max_level, depth_first, branch, commit_sha): expand_tree(key, value, tree, object_depth) return tree def _head_cache_key(self, key): return "%s/%s" % (self.id, key) def _tree_entry(self, path, tree_entry, branch='master', bypass_head_cache=False): child_path = self._tree_entry_key(path, tree_entry) obj = None if obj is None: obj = self.repo[tree_entry.sha] return child_path, obj def _tree_entry_key(self, path, tree_entry): if path: return "%s/%s" % (path, tree_entry.path) else: return tree_entry.path def _branch_ref_name(self, name): if name.startswith('refs/heads/'): return name else: return "refs/heads/%s" % name def branch_head(self, name): with self.lock: return self.repo.refs[self._branch_ref_name(name)] def _add_tree(self, root_tree, blobs, branch='master', commit_sha=None): """Commit a new tree. :param root_tree: Root tree to add trees to :param blobs: Iterable over blob path, sha, mode entries :return: SHA1 of the created tree. """ trees = {"": {}} def add_tree(path): if path in trees: return trees[path] dirname, basename = pathsplit(path) t = add_tree(dirname) assert isinstance(basename, basestring) newtree = {} t[basename] = newtree trees[path] = newtree return newtree for path, sha, mode in blobs: tree_path, basename = pathsplit(path) tree = add_tree(tree_path) tree[basename] = (mode, sha) def build_tree(path): if path: tree = self._get_object(path, branch=branch, commit_sha=commit_sha) if not tree: tree = Tree() if not isinstance(tree, Tree): self.delete(path, branch=branch) tree = Tree() else: tree = root_tree for basename, entry in trees[path].iteritems(): if type(entry) == dict: mode = stat.S_IFDIR sha = build_tree(pathjoin(path, basename)) else: (mode, sha) = entry tree.add(basename, mode, sha) self.repo.object_store.add_object(tree) return tree.id return build_tree("")
#!/usr/bin/env python3 import os, sys, socket, traceback, json, yaml, getpass from dulwich.repo import Repo repo_path = os.path.realpath(os.path.expanduser('~/.myrepo')) if not os.path.exists(repo_path): os.mkdir(repo_path) if not os.path.exists('{}/.git'.format(repo_path)): repo = Repo.init(repo_path) else: repo = Repo(repo_path) yaml.dump(repo, sys.stdout) index = repo.open_index() MSG = f' repo index path={index.path}, index list={list(index)}, ' yaml.dump(MSG, sys.stdout) f = open(f'{repo_path}/foo', 'wb') _ = f.write(b"monty1") f.close() repo.stage([b"foo"]) print(",".join( [f.decode(sys.getfilesystemencoding()) for f in repo.open_index()])) commit_id = repo.do_commit( b"The first commit") #, committer=getpass.getuser().encode()) print(f' commit_id={commit_id}, repo_head = {repo.head()} ') #repo = Repo("myrepo")
class GitRepo(object): def __init__(self, path): if os.path.exists(path): if not os.path.isdir(path): raise IOError('Git repository "%s" must be a directory.' % path) try: self.repo = Repo(path) except NotGitRepository: # repo does not exist self.repo = Repo.init(path, not os.path.exists(path)) self.temp_persist_files = [] def _get_commit(self, version="HEAD"): commit = self.repo[version] if not isinstance(commit, Commit): raise NotCommitError(commit) return commit def get_type(self, name, version="HEAD"): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) if tree[name][0] & stat.S_IFDIR: return "tree" else: return "blob" def get_path(self, name, version="HEAD", path_type=None, out_name=None, out_suffix=''): if path_type is None: path_type = self.get_type(name, version) if path_type == 'tree': return self.get_dir(name, version, out_name, out_suffix) elif path_type == 'blob': return self.get_file(name, version, out_name, out_suffix) raise TypeError("Unknown path type '%s'" % path_type) def _write_blob(self, blob_sha, out_fname=None, out_suffix=''): if out_fname is None: # create a temporary file (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix, prefix='vt_persist') os.close(fd) self.temp_persist_files.append(out_fname) else: out_dirname = os.path.dirname(out_fname) if out_dirname and not os.path.exists(out_dirname): os.makedirs(out_dirname) blob = self.repo.get_blob(blob_sha) with open(out_fname, "wb") as f: for b in blob.as_raw_chunks(): f.write(b) return out_fname def get_file(self, name, version="HEAD", out_fname=None, out_suffix=''): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find blob "%s"' % name) blob_sha = tree[name][1] out_fname = self._write_blob(blob_sha, out_fname, out_suffix) return out_fname def get_dir(self, name, version="HEAD", out_dirname=None, out_suffix=''): if out_dirname is None: # create a temporary directory out_dirname = tempfile.mkdtemp(suffix=out_suffix, prefix='vt_persist') self.temp_persist_files.append(out_dirname) elif not os.path.exists(out_dirname): os.makedirs(out_dirname) commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find tree "%s"' % name) subtree_id = tree[name][1] # subtree = self.repo.tree(subtree_id) for entry in self.repo.object_store.iter_tree_contents(subtree_id): out_fname = os.path.join(out_dirname, entry.path) self._write_blob(entry.sha, out_fname) return out_dirname def get_hash(self, name, version="HEAD", path_type=None): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) return tree[name][1] @staticmethod def compute_blob_hash(fname, chunk_size=1 << 16): obj_len = os.path.getsize(fname) head = object_header(Blob.type_num, obj_len) with open(fname, "rb") as f: def read_chunk(): return f.read(chunk_size) my_iter = chain([head], iter(read_chunk, '')) return iter_sha1(my_iter) @staticmethod def compute_tree_hash(dirname): tree = Tree() for entry in sorted(os.listdir(dirname)): fname = os.path.join(dirname, entry) if os.path.isdir(fname): thash = GitRepo.compute_tree_hash(fname) mode = stat.S_IFDIR # os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, thash) elif os.path.isfile(fname): bhash = GitRepo.compute_blob_hash(fname) mode = os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, bhash) return tree.id @staticmethod def compute_hash(path): if os.path.isdir(path): return GitRepo.compute_tree_hash(path) elif os.path.isfile(path): return GitRepo.compute_blob_hash(path) raise TypeError("Do not support this type of path") def get_latest_version(self, path): head = self.repo.head() walker = Walker(self.repo.object_store, [head], max_entries=1, paths=[path]) return iter(walker).next().commit.id def _stage(self, filename): fullpath = os.path.join(self.repo.path, filename) if os.path.islink(fullpath): debug.warning("Warning: not staging symbolic link %s" % os.path.basename(filename)) elif os.path.isdir(fullpath): for f in os.listdir(fullpath): self._stage(os.path.join(filename, f)) else: if os.path.sep != '/': filename = filename.replace(os.path.sep, '/') self.repo.stage(filename) def add_commit(self, filename): self.setup_git() self._stage(filename) commit_id = self.repo.do_commit('Updated %s' % filename) return commit_id def setup_git(self): config_stack = self.repo.get_config_stack() try: config_stack.get(('user', ), 'name') config_stack.get(('user', ), 'email') except KeyError: from vistrails.core.system import current_user from dulwich.config import ConfigFile user = current_user() repo_conf = self.repo.get_config() repo_conf.set(('user', ), 'name', user) repo_conf.set(('user', ), 'email', '%s@localhost' % user) repo_conf.write_to_path()
class GitStorage(): def _ignoreFile(self, dirName, fileName): """ used for the copTree stuff ``dirName`` the working directory ``fileName`` list of files inside the directory (dirName) """ result = [] for i in fileName: path = dirName + i if path not in fileToIgnore: result.append(path) return result def _commit(self, tree): """ commit a tree used only by the init ``tree`` tree to commit """ commit = Commit() commit.tree = tree.id commit.encoding = "UTF-8" commit.committer = commit.author = 'debexpo <%s>' % (pylons.config['debexpo.email']) commit.commit_time = commit.author_time = int(time()) tz = parse_timezone('-0200')[0] commit.commit_timezone = commit.author_timezone = tz commit.message = " " self.repo.object_store.add_object(tree) self.repo.object_store.add_object(commit) self.repo.refs["HEAD"] = commit.id log.debug('commiting') return commit.id def __init__(self, path): #creating the repository if os.path.isdir(path): log.debug("directory exist, taking it as a git repository") self.repo = Repo(path) else: log.debug("directory doesn't exist, creating") os.makedirs(path) log.debug("initiate the repo") self.repo = Repo.init(path) log.debug("adding an empty tree to the repository") self._commit(Tree()) #only this function will be used on upload def change(self, files): """ used to change afile in the git storage can be called for the first upload we don't care ``files`` a list of file to change """ if len(files) == 0: log.debug("trying to change nothing will do... nothing") else: log.debug("this will change %i files" % (len(files))) for f in files: self.repo.stage(str(f)) log.debug("stages dones") self.repo.do_commit("this is so awesome that nobody will never see it", committer="same here <*****@*****.**>") def buildTreeDiff(self, dest, tree=None, originalTree=None): """ creating files from the diff between 2 trees, it will be used in the code browser to get older version (walking on history) ``tree`` the tree that you want to compare to ``dest`` the destination folder to build sources ``originalTree`` the original Tree, by default it's the last one by default it's retun the last changed files """ if tree is None: head = self.repo.commit(self.repo.commit(self.repo.head()).parents[0]) tree = self.repo.tree(head.tree) if originalTree is None: originalTree = self.repo.tree(self.repo.commit(self.repo.head()).tree) blobToBuild = [] #getting blob that have changed for blob in self.repo.object_store.iter_tree_contents(tree.id): if blob not in originalTree: blobToBuild.append(blob) fileToIgnore.append(blob.path) repoLocation = os.path.join(str(self.repo).split("'")[1]) #creating the folder with link to older files if os.path.exists(repoLocation + dest): log.warning("%s already exist, copy will not work") else: log.debug("copying files") shutil.copytree(repoLocation, repoLocation + dest, symlinks=True, ignore=self._ignoreFile) for b in blobToBuild: fileDirectory = os.path.split(b.path) fileDirectory.pop() if not os.path.exists(os.path.join(repoLocation + dest, os.path.join(fileDirectory))): os.makedirs(os.path.join(repoLocation + dest, os.path.join(fileDirectory))) file = open(os.path.join(repoLocation + dest, b.path), 'w') file.write(self.repo.get_object(b.sha).as_raw_string()) file.close() tree = None originalTree = None #get* def getLastTree(self): """ return the last tree """ return self.repo.tree(self.repo._commit(self.repo.head()).tree) def getAllTrees(self): """ return trees """ result = [] commit = self.repo._commit(self.repo.head()) for c in commit._get_parents(): result.append(c.tree) return result def getOlderFileContent(self, file): """ return the first file's content that changed from the file ``file`` the file to work on """ with open(file) as f: originalBlob = Blob.from_string("".join(f.readlines())) trees = self.getAllTrees() for t in trees: #parsing tree in order to find the tree where the file change if originalBlob not in t: tree = t break #tree must be existent, other way file is not correct if tree is None: log.error( "there is no tree that contain this blob this souldn't happen, other way this file does not appear to come from this package") else: if self.repo._commit(self.repo.head()).tree == tree: olderTree = self.repo.commit(self.repo.head())._get_parents()[0].tree else: for c in self.repo._commit(self.repo.head())._get_parents(): if c.tree == tree: try: olderTree = c.get_parents()[0] except IndexError: log.debug("file is the last version") olderTree = tree if olderTree != tree: #we must check here the blob that contains the older file for b in self.repo.object_store.iter_tree_contents(olderTree.id): if originalBlob.path == b.path: #older blob find! awesome, in the first loop we already test if they are the same # that's why we can now return the content of the file return self.repo.get_object(b.sha).as_raw_string() return "" def getOlderCommits(self): """ return a list of all commits """ return self.repo.commit(self.repo.head())._get_parents()
class Store(object): """ A simple key/value store using git as the backing store. """ def __init__(self, id, repo_path, serializer=None): self.id = id if os.path.exists(repo_path): self.repo = Repo(repo_path) else: raise ValueError("Store repo path does not exist: %s" % repo_path) if not serializer: self.serializer = json else: self.serializer = serializer self.lock = threading.RLock() def gc(self): with self.lock: if which('git'): repo_dir = self.repo.path try: log.info("starting gc on repo %s" % repo_dir) subprocess.check_call("git gc --auto", cwd=repo_dir, shell=True) log.info("finished gc on repo %s" % repo_dir) self.repo = Repo(self.repo.path) except subprocess.CalledProcessError: log.exception("git gc failed for repo %s" % repo_dir) def create_branch(self, branch, parent=None): with self.lock: if not parent: parent = self.branch_head('master') branch_ref = self._branch_ref_name(branch) self.repo.refs.add_if_new(branch_ref, parent) return {'sha': self.branch_head(branch)} def merge(self, source_branch, target_branch='master', author=None, committer=None): with self.lock: if source_branch == target_branch: raise ValueError("Cannot merge branch with itself %s" % source_branch) target_tree = self._get_object(ROOT_PATH, target_branch) branch_tree = self._get_object(ROOT_PATH, source_branch) for tc in diff_tree.tree_changes(self.repo.object_store, target_tree.id, branch_tree.id): if tc.type == diff_tree.CHANGE_ADD: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode), )) if tc.type == diff_tree.CHANGE_COPY: pass if tc.type == diff_tree.CHANGE_DELETE: target_tree = self._delete(tc.old.path, target_branch) if tc.type == diff_tree.CHANGE_MODIFY: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode), )) if tc.type == diff_tree.CHANGE_RENAME: pass if tc.type == diff_tree.CHANGE_UNCHANGED: pass msg = "Merge %s to %s" % (source_branch, target_branch) merge_heads = [self.branch_head(source_branch)] sha = self.repo.do_commit(tree=target_tree.id, message=msg, ref=self._branch_ref_name(target_branch), merge_heads=merge_heads, author=author, committer=committer) return {'sha': sha} def get(self, key, shallow=False, branch='master', commit_sha=None): """ Get a tree or blob from the store by key. The key param can be paths such as 'a/b/c'. If the key requested represents a Tree in the git db, then a document will be returned in the form of a python dict. If the key requested represents a Blob in the git db, then a python string will be returned. :param key: The key to retrieve from the store :param branch: The branch name to search for the requested key :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob """ if not commit_sha: commit_sha = self.branch_head(branch) obj = self._get_object(key, branch, commit_sha) if obj: if isinstance(obj, Blob): return self.serializer.loads(obj.data) elif isinstance(obj, Tree): keys = key.split('/') min_level = len(filter(None, keys)) if shallow: max_level = min_level + 1 else: max_level = sys.maxint tree = self.trees(key, min_level=min_level, max_level=max_level, branch=branch, commit_sha=commit_sha) if keys != [ROOT_PATH]: for k in keys: tree = tree[k] tree['commit_sha'] = commit_sha return tree return None def _get_object(self, key, branch='master', commit_sha=None, bypass_head_cache=True): try: if not commit_sha: commit_sha = self.branch_head(branch) (mode, sha) = tree_lookup_path(self.repo.get_object, self._repo_tree(commit_sha), key) return self.repo[sha] except KeyError: return None except NotTreeError: return None def diff(self, old_sha, new_sha=None): """Show the changed files between OLD_SHA and NEW_SHA If NEW_SHA is not set, it will default to HEAD. The output is a list of tuples (action, filename) :param old_sha: parent commit's sha :param new_sha: another sha, defaults to HEAD :retval: dict """ orig = self._get_object(ROOT_PATH, commit_sha=old_sha) new = self._get_object(ROOT_PATH) if new_sha: new = self._get_object(ROOT_PATH, commit_sha=new_sha) keys = { diff_tree.CHANGE_DELETE: 'delete', diff_tree.CHANGE_ADD: 'add', diff_tree.CHANGE_MODIFY: 'modify' } out = defaultdict(list) for change_tree in diff_tree.tree_changes(self.repo.object_store, orig.id, new.id, want_unchanged=False): if change_tree.type.lower() == "delete" and change_tree.old.path: # if the change was a delete, we have no tree or blob to yield so return key with no value # return in the same type of structure for consistency out[change_tree.type].append([(change_tree.old.path, None)]) else: out[change_tree.type].append( filter(None, self.entries(change_tree.new.path))) return out def put(self, key, value, flatten_keys=True, branch='master', author=None, committer=None, overwrite=False): """ Add/Update many key value pairs in the store. The entries param should be a python dict containing one or more key value pairs to store. The keys can be nested paths of objects to set. :param key: The key to store the entry/entries in :param value: The value to store. """ with self.lock: e = {key: value} if flatten_keys: e = flatten(e) root_tree = self._get_object(ROOT_PATH, branch) merge_heads = [] if not root_tree: root_tree = self._get_object(ROOT_PATH) merge_heads = [self.branch_head('master')] blobs = [] msg = '' existing_obj = None if type(value) == types.DictType: try: existing_obj = self.get(key, shallow=True, branch=branch) except: pass if existing_obj: if 'commit_sha' in existing_obj: del existing_obj['commit_sha'] existing_obj = flatten({key: existing_obj}) for (k, value) in e.iteritems(): blob = Blob.from_string(self.serializer.dumps(value)) self.repo.object_store.add_object(blob) blobs.append((k, blob.id, stat.S_IFREG)) if existing_obj and k in existing_obj: if existing_obj[k] != value: msg += "Put %s\n" % k del existing_obj[k] else: msg += "Put %s\n" % k if overwrite and existing_obj: for k in existing_obj: self.delete(k, branch=branch) root_tree = self._get_object(ROOT_PATH, branch) root_id = self._add_tree(root_tree, blobs) sha = self.repo.do_commit(tree=root_id, message=msg, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer) return {'sha': sha} def delete(self, key, branch='master', author=None, committer=None): """ Delete one or more entries from the store. The key param can refer to either a Tree or Blob in the store. If it refers to a Blob, then just that entry will be removed. If it refers to a Tree, then that entire subtree will be removed. :param key: The key to remove from the store. """ with self.lock: tree = self._get_object(key, branch) merge_heads = [] delete_branch = branch if not tree: merge_heads = [self.branch_head('master')] delete_branch = 'master' root = self._delete(key, delete_branch) sha = self.repo.do_commit(tree=root.id, message="Delete %s" % key, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer) return {'sha': sha} def _delete(self, key, branch='master'): trees = {} path = key if path: while path: (path, name) = pathsplit(path) trees[path] = self._get_object(path, branch) else: trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch) (path, name) = pathsplit(key) if name: del trees[path][name] else: for entry in trees[path].iteritems(): del trees[path][entry.path] if path: while path: (parent_path, name) = pathsplit(path) trees[parent_path].add(name, stat.S_IFDIR, trees[path].id) self.repo.object_store.add_object(trees[path]) path = parent_path self.repo.object_store.add_object(trees[ROOT_PATH]) else: self.repo.object_store.add_object(trees[ROOT_PATH]) return trees[ROOT_PATH] def _repo_tree(self, commit_sha): return self.repo[commit_sha].tree def keys(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, filter_by=None, branch='master', commit_sha=None): """ Returns a list of keys from the store. The path param can be used to scope the request to return keys from a subset of the tree. The filter_by param can be used to control whether to return keys for Blob nodes, Tree nodes or all nodes. Default is to return all node keys from the root of the store. :param path: The starting point retrieve key paths from. Default is '' which starts from the root of the store. :param filter_by: Either 'blob', 'tree' or None. Controls what type of node key paths to return. Default is None which returns all node type key paths :param branch: The branch name to return key paths for. :return: A list of keys sorted lexically. """ if filter_by == 'blob': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob) elif filter_by == 'tree': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree) else: filter_fn = None return map( lambda x: x[0], filter( filter_fn, self.iteritems(path, pattern, min_level, max_level, depth_first, branch, commit_sha))) def entries(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, branch='master', commit_sha=None): for key, obj in self.iteritems(path, pattern, min_level, max_level, depth_first, branch, commit_sha): if isinstance(obj, Blob): yield (key, self.serializer.loads(str(obj.data))) def iteritems(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, branch='master', commit_sha=None): try: import gevent except: gevent = None def _node(level, path, node): return level, path, node root = self._get_object(path, branch=branch, commit_sha=commit_sha) bypass_head_cache = False level = len(filter(None, path.split('/'))) if min_level is None: min_level = 0 if max_level is None: max_level = sys.maxint nodes_to_visit = collections.deque([_node(level, path, root)]) while len(nodes_to_visit) > 0: # allow server to yield to other greenlets during long tree traversals if gevent: gevent.sleep(0) (level, path, node) = nodes_to_visit.popleft() if isinstance(node, Tree): children = filter( lambda child: min_level < child[0] <= max_level, map( lambda child: _node( level + 1, *self._tree_entry(path, child, bypass_head_cache)), node.iteritems())) if depth_first: nodes_to_visit.extendleft(children) else: nodes_to_visit.extend(children) if min_level < level <= max_level: if pattern is not None: if pattern.match(path): yield (path, node) else: yield (path, node) def trees(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, object_depth=None, branch='master', commit_sha=None): """ Returns a python dict representation of the store. The resulting dict can be scoped to a particular subtree in the store with the tree or path params. The tree param is a git Tree object to begin from, while the path is a string key to begin from. The branch param is used to specify the git branch name to build the dict from. :param path: Option string key to begin building the dict from. Defaults to '' which starts from the root of the store. :param pattern: Regex pattern to filter matching tree paths. does full tree traversal. :param branch: Optional git branch name to return key paths from. Defaults to HEAD. :return: A dict represents a section of the store. """ tree = {} for key, value in self.entries(path, pattern, min_level, max_level, depth_first, branch, commit_sha): expand_tree(key, value, tree, object_depth) return tree def _head_cache_key(self, key): return "%s/%s" % (self.id, key) def _tree_entry(self, path, tree_entry, branch='master', bypass_head_cache=False): child_path = self._tree_entry_key(path, tree_entry) obj = None if obj is None: obj = self.repo[tree_entry.sha] return child_path, obj def _tree_entry_key(self, path, tree_entry): if path: return "%s/%s" % (path, tree_entry.path) else: return tree_entry.path def _branch_ref_name(self, name): if name.startswith('refs/heads/'): return name else: return "refs/heads/%s" % name def branch_head(self, name): with self.lock: return self.repo.refs[self._branch_ref_name(name)] def _add_tree(self, root_tree, blobs, branch='master', commit_sha=None): """Commit a new tree. :param root_tree: Root tree to add trees to :param blobs: Iterable over blob path, sha, mode entries :return: SHA1 of the created tree. """ trees = {"": {}} def add_tree(path): if path in trees: return trees[path] dirname, basename = pathsplit(path) t = add_tree(dirname) assert isinstance(basename, basestring) newtree = {} t[basename] = newtree trees[path] = newtree return newtree for path, sha, mode in blobs: tree_path, basename = pathsplit(path) tree = add_tree(tree_path) tree[basename] = (mode, sha) def build_tree(path): if path: tree = self._get_object(path, branch=branch, commit_sha=commit_sha) if not tree: tree = Tree() if not isinstance(tree, Tree): self.delete(path, branch=branch) tree = Tree() else: tree = root_tree for basename, entry in trees[path].iteritems(): if type(entry) == dict: mode = stat.S_IFDIR sha = build_tree(pathjoin(path, basename)) else: (mode, sha) = entry tree.add(basename, mode, sha) self.repo.object_store.add_object(tree) return tree.id return build_tree("")
class Store(object): """ A simple key/value store using git as the backing store. """ def __init__(self, repo_path, serializer=None): if os.path.exists(repo_path): self.repo = Repo(repo_path) else: raise ValueError("Store repo path does not exist: %s" % repo_path) if not serializer: self.serializer = json else: self.serializer = serializer def create_branch(self, branch, parent=None): if not parent: parent = self.branch_head('master') branch_ref = self._branch_ref_name(branch) self.repo.refs.add_if_new(branch_ref, parent) return {'sha': self.branch_head(branch)} def merge(self, source_branch, target_branch='master', author=None, committer=None): if source_branch == target_branch: raise ValueError("Cannot merge branch with itself %s" % source_branch) target_tree = self._get_object(ROOT_PATH, target_branch) branch_tree = self._get_object(ROOT_PATH, source_branch) for tc in diff_tree.tree_changes(self.repo.object_store, target_tree.id, branch_tree.id): if tc.type == diff_tree.CHANGE_ADD: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),)) if tc.type == diff_tree.CHANGE_COPY: pass if tc.type == diff_tree.CHANGE_DELETE: target_tree = self._delete(tc.old.path, target_branch) if tc.type == diff_tree.CHANGE_MODIFY: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),)) if tc.type == diff_tree.CHANGE_RENAME: pass if tc.type == diff_tree.CHANGE_UNCHANGED: pass msg = "Merge %s to %s" % (source_branch, target_branch) merge_heads = [self.branch_head(source_branch)] sha = self.repo.do_commit( tree=target_tree.id, message=msg, ref=self._branch_ref_name(target_branch), merge_heads=merge_heads, author=author, committer=committer ) return {'sha': sha} def get(self, key, shallow=False, branch='master', commit_sha=None): """ Get a tree or blob from the store by key. The key param can be paths such as 'a/b/c'. If the key requested represents a Tree in the git db, then a document will be returned in the form of a python dict. If the key requested represents a Blob in the git db, then a python string will be returned. :param key: The key to retrieve from the store :param branch: The branch name to search for the requested key :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob """ obj = self._get_object(key, branch, commit_sha) if obj: if isinstance(obj, Blob): return self.serializer.loads(obj.data) elif isinstance(obj, Tree): keys = key.split('/') depth = None if shallow: depth = len(keys) tree = self.trees(key, depth=depth, branch=branch) if keys != [ROOT_PATH]: for k in keys: tree = tree[k] return tree return None def _get_object(self, key, branch='master', commit_sha=None): try: if not commit_sha: commit_sha = self.branch_head(branch) (mode, sha) = tree_lookup_path(self.repo.get_object, self._repo_tree(commit_sha), key) return self.repo[sha] except KeyError: return None def put(self, key, value, flatten_keys=True, branch='master', author=None, committer=None): """ Add/Update many key value pairs in the store. The entries param should be a python dict containing one or more key value pairs to store. The keys can be nested paths of objects to set. :param key: The key to store the entry/entries in :param value: The value to store. """ e = {key: value} if flatten_keys: e = flatten(e) root_tree = self._get_object(ROOT_PATH, branch) merge_heads = [] if not root_tree: root_tree = self._get_object(ROOT_PATH) merge_heads = [self.branch_head('master')] blobs=[] msg = '' for (key, value) in e.iteritems(): blob = Blob.from_string(self.serializer.dumps(value)) self.repo.object_store.add_object(blob) blobs.append((key, blob.id, stat.S_IFREG)) msg += "Put %s\n" % key root_id = self._add_tree(root_tree, blobs) sha = self.repo.do_commit( tree=root_id, message=msg, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer ) return {'sha': sha} def delete(self, key, branch='master', author=None, committer=None): """ Delete one or more entries from the store. The key param can refer to either a Tree or Blob in the store. If it refers to a Blob, then just that entry will be removed. If it refers to a Tree, then that entire subtree will be removed. :param key: The key to remove from the store. """ tree = self._get_object(key, branch) merge_heads = [] delete_branch = branch if not tree: merge_heads = [self.branch_head('master')] delete_branch = 'master' root = self._delete(key, delete_branch) sha = self.repo.do_commit( tree=root.id, message="Delete %s" % key, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer ) return {'sha': sha} def _delete(self, key, branch='master'): trees={} path = key if path: while path: (path, name) = pathsplit(path) trees[path] = self._get_object(path, branch) else: trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch) (path, name) = pathsplit(key) if name: del trees[path][name] else: for entry in trees[path].iteritems(): del trees[path][entry.path] if path: while path: (parent_path, name) = pathsplit(path) trees[parent_path].add(name, stat.S_IFDIR, trees[path].id) self.repo.object_store.add_object(trees[path]) path = parent_path self.repo.object_store.add_object(trees[ROOT_PATH]) else: self.repo.object_store.add_object(trees[ROOT_PATH]) return trees[ROOT_PATH] def _repo_tree(self, commit_sha): return self.repo[commit_sha].tree def keys(self, path=ROOT_PATH, pattern=None, depth=None, filter_by=None, branch='master', commit_sha=None): """ Returns a list of keys from the store. The path param can be used to scope the request to return keys from a subset of the tree. The filter_by param can be used to control whether to return keys for Blob nodes, Tree nodes or all nodes. Default is to return all node keys from the root of the store. :param path: The starting point retrieve key paths from. Default is '' which starts from the root of the store. :param filter_by: Either 'blob', 'tree' or None. Controls what type of node key paths to return. Default is None which returns all node type key paths :param branch: The branch name to return key paths for. :return: A list of keys sorted lexically. """ if filter_by == 'blob': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob) elif filter_by == 'tree': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree) else: filter_fn = None return map(lambda x: x[0], filter(filter_fn, self.raw_entries(path, pattern, depth, branch, commit_sha))) def entries(self, path=ROOT_PATH, pattern=None, depth=None, branch='master', commit_sha=None): for key, obj in self.raw_entries(path, pattern, depth, branch, commit_sha): if isinstance(obj, Blob): yield (key, self.serializer.loads(str(obj.data))) def raw_entries(self, path=ROOT_PATH, pattern=None, depth=None, branch='master', commit_sha=None): """ Returns a generator that traverses the tree and produces entries of the form (tree_path, git_object), where tree_path is a string representing a key into the store and git_object is either a git Blob or Tree object. :param path: String key to begin producing result entries from. Defaults to '' which starts from the root of the store. :param pattern: Regex pattern to filter matching tree paths. :param depth: Specifies how deep to recurse when producing results. Default is None which does full tree traversal. :param branch: Git branch name to return key paths for. Defaults to HEAD. :return: A generator that produces entries of the form (tree_path, git_object) """ tree = self._get_object(path, branch, commit_sha) if not isinstance(tree, Tree): raise ValueError("Path %s is not a tree!" % path) else: if not pattern: pattern = MATCH_ALL return self._entries(path, tree, pattern, depth) def _entries(self, path, tree, pattern, depth=None): for tree_entry in tree.iteritems(): obj = self.repo[tree_entry.sha] key = self._tree_entry_key(path, tree_entry) if pattern.match(key): yield (key, obj) if isinstance(obj, Tree): if not depth: for te in self._entries(key, obj, pattern, depth): yield te else: if depth > 1: for te in self._entries(key, obj, pattern, depth-1): yield te def trees(self, path=ROOT_PATH, pattern=None, depth=None, object_depth=None, branch='master', commit_sha=None): """ Returns a python dict representation of the store. The resulting dict can be scoped to a particular subtree in the store with the tree or path params. The tree param is a git Tree object to begin from, while the path is a string key to begin from. The branch param is used to specify the git branch name to build the dict from. :param path: Option string key to begin building the dict from. Defaults to '' which starts from the root of the store. :param pattern: Regex pattern to filter matching tree paths. :param depth: Specifies how deep to recurse when producing results. Default is None which does full tree traversal. :param branch: Optional git branch name to return key paths from. Defaults to HEAD. :return: A dict represents a section of the store. """ tree = {} for path, value in self.entries(path, pattern, depth, branch, commit_sha): expand_tree(path, value, tree, object_depth) return tree def _tree_entry_key(self, path, tree_entry): if path: return "%s/%s" % (path, tree_entry.path) else: return tree_entry.path def _branch_ref_name(self, name): if name.startswith('refs/heads/'): return name else: return "refs/heads/%s" % name def branch_head(self, name): return self.repo.refs[self._branch_ref_name(name)] def _add_tree(self, root_tree, blobs): """Commit a new tree. :param root_tree: Root tree to add trees to :param blobs: Iterable over blob path, sha, mode entries :return: SHA1 of the created tree. """ trees = {"": {}} def add_tree(path): if path in trees: return trees[path] dirname, basename = pathsplit(path) t = add_tree(dirname) assert isinstance(basename, basestring) newtree = {} t[basename] = newtree trees[path] = newtree return newtree for path, sha, mode in blobs: tree_path, basename = pathsplit(path) tree = add_tree(tree_path) tree[basename] = (mode, sha) def build_tree(path): if path: tree = self._get_object(path) if not tree: tree = Tree() if not isinstance(tree, Tree): self.delete(path) tree = Tree() else: tree = root_tree for basename, entry in trees[path].iteritems(): if type(entry) == dict: mode = stat.S_IFDIR sha = build_tree(pathjoin(path, basename)) else: (mode, sha) = entry tree.add(basename, mode, sha) self.repo.object_store.add_object(tree) return tree.id return build_tree("")
class GitStorage(): def _ignoreFile(self, dirName, fileName): """ used for the copTree stuff ``dirName`` the working directory ``fileName`` list of files inside the directory (dirName) """ result = [] for i in fileName: path = dirName + i if path not in fileToIgnore: result.append(path) return result def _commit(self, tree): """ commit a tree used only by the init ``tree`` tree to commit """ commit = Commit() commit.tree = tree.id commit.encoding = "UTF-8" commit.committer = commit.author = 'debexpo <%s>' % ( pylons.config['debexpo.email']) commit.commit_time = commit.author_time = int(time()) tz = parse_timezone('-0200')[0] commit.commit_timezone = commit.author_timezone = tz commit.message = " " self.repo.object_store.add_object(tree) self.repo.object_store.add_object(commit) self.repo.refs["HEAD"] = commit.id log.debug('commiting') return commit.id def __init__(self, path): #creating the repository if os.path.isdir(path): log.debug("directory exist, taking it as a git repository") self.repo = Repo(path) else: log.debug("directory doesn't exist, creating") os.makedirs(path) log.debug("initiate the repo") self.repo = Repo.init(path) log.debug("adding an empty tree to the repository") self._commit(Tree()) #only this function will be used on upload def change(self, files): """ used to change afile in the git storage can be called for the first upload we don't care ``files`` a list of file to change """ if len(files) == 0: log.debug("trying to change nothing will do... nothing") else: log.debug("this will change %i files" % (len(files))) for f in files: self.repo.stage(str(f)) log.debug("stages dones") self.repo.do_commit( "this is so awesome that nobody will never see it", committer="same here <*****@*****.**>") def buildTreeDiff(self, dest, tree=None, originalTree=None): """ creating files from the diff between 2 trees, it will be used in the code browser to get older version (walking on history) ``tree`` the tree that you want to compare to ``dest`` the destination folder to build sources ``originalTree`` the original Tree, by default it's the last one by default it's retun the last changed files """ if tree is None: head = self.repo.commit( self.repo.commit(self.repo.head()).parents[0]) tree = self.repo.tree(head.tree) if originalTree is None: originalTree = self.repo.tree( self.repo.commit(self.repo.head()).tree) blobToBuild = [] #getting blob that have changed for blob in self.repo.object_store.iter_tree_contents(tree.id): if blob not in originalTree: blobToBuild.append(blob) fileToIgnore.append(blob.path) repoLocation = os.path.join(str(self.repo).split("'")[1]) #creating the folder with link to older files if os.path.exists(repoLocation + dest): log.warning("%s already exist, copy will not work") else: log.debug("copying files") shutil.copytree(repoLocation, repoLocation + dest, symlinks=True, ignore=self._ignoreFile) for b in blobToBuild: fileDirectory = os.path.split(b.path) fileDirectory.pop() if not os.path.exists( os.path.join(repoLocation + dest, os.path.join(fileDirectory))): os.makedirs( os.path.join(repoLocation + dest, os.path.join(fileDirectory))) file = open(os.path.join(repoLocation + dest, b.path), 'w') file.write(self.repo.get_object(b.sha).as_raw_string()) file.close() tree = None originalTree = None #get* def getLastTree(self): """ return the last tree """ return self.repo.tree(self.repo._commit(self.repo.head()).tree) def getAllTrees(self): """ return trees """ result = [] commit = self.repo._commit(self.repo.head()) for c in commit._get_parents(): result.append(c.tree) return result def getOlderFileContent(self, file): """ return the first file's content that changed from the file ``file`` the file to work on """ with open(file) as f: originalBlob = Blob.from_string("".join(f.readlines())) trees = self.getAllTrees() for t in trees: #parsing tree in order to find the tree where the file change if originalBlob not in t: tree = t break #tree must be existent, other way file is not correct if tree is None: log.error( "there is no tree that contain this blob this souldn't happen, other way this file does not appear to come from this package" ) else: if self.repo._commit(self.repo.head()).tree == tree: olderTree = self.repo.commit( self.repo.head())._get_parents()[0].tree else: for c in self.repo._commit(self.repo.head())._get_parents(): if c.tree == tree: try: olderTree = c.get_parents()[0] except IndexError: log.debug("file is the last version") olderTree = tree if olderTree != tree: #we must check here the blob that contains the older file for b in self.repo.object_store.iter_tree_contents( olderTree.id): if originalBlob.path == b.path: #older blob find! awesome, in the first loop we already test if they are the same # that's why we can now return the content of the file return self.repo.get_object(b.sha).as_raw_string() return "" def getOlderCommits(self): """ return a list of all commits """ return self.repo.commit(self.repo.head())._get_parents()
class GitUiOpts: def __init__(self, path, remote_target="origin"): self.committer = None self.repo_path = path self.repo = Repo(self.repo_path) self.unstaged = [] self.staged = [] self.remote_url = self.repo.get_config().get(('remote', remote_target), 'url').decode() self.remote_url_credentials = None def get_unstaged(self): status = porcelain.status(self.repo.path) for x in np.concatenate((status.untracked, status.unstaged)): try: x = x.decode() except: pass finally: self.unstaged.append(x) def get_staged(self): staged = porcelain.status(self.repo.path).staged for type_file in ['delete', 'add', 'modify']: for filepath in staged[type_file]: self.staged.append({ "type": type_file, "path": filepath.decode() }) def stage_file(self, filepath): if filepath in self.unstaged: self.repo.stage([filepath]) def commit_all_files(self, commit_title): self.repo.do_commit(commit_title.encode(), committer=self.committer.encode()) print(commit_title) def commit_file(self): self.get_staged() for file_to_commit in self.staged: commit_title = '{} {}'.format( file_to_commit['type'], file_to_commit['path'].split('/')[-1]) self.repo.do_commit(commit_title.encode(), committer=self.committer.encode()) print(commit_title) def push_once(self): remote_url = self.remote_url if self.remote_url_credentials is None else self.remote_url_credentials porcelain.push(self.repo, remote_location=remote_url, refspecs="master") self.staged = [] def push(self): is_pushed = False while is_pushed is False: try: self.push_once() except: username = self.simple_input(content="Username : "******"Password : "******"//{0}:{1}@".format( username, password).join(self.remote_url.split('//')) self.push_once() finally: is_pushed = True def get_committer(self): username, mail = "", "" result = subprocess.run(["git", "config", "--list"], stdout=subprocess.PIPE) for row in result.stdout.decode().split("\n"): row_formatted = row.split("=") if len(row_formatted) == 2: row_key = row_formatted[0] row_value = row_formatted[1] if row_key == "user.name": username = row_value elif row_key == "user.email": mail = row_value return username, mail def simple_input(self, content): print(content) return input() def select_input(self, keyword, message, choices): questions = [ inquirer.List( keyword, message=message, choices=choices, ), ] answers = inquirer.prompt(questions) return answers[keyword]
class Gittle(object): """All paths used in Gittle external methods must be paths relative to the git repository """ DEFAULT_COMMIT = "HEAD" DEFAULT_BRANCH = "master" DEFAULT_REMOTE = "origin" DEFAULT_MESSAGE = "**No Message**" DEFAULT_USER_INFO = {"name": None, "email": None} DIFF_FUNCTIONS = { "classic": utils.git.classic_tree_diff, "dict": utils.git.dict_tree_diff, "changes": utils.git.dict_tree_diff, } DEFAULT_DIFF_TYPE = "dict" HIDDEN_REGEXES = [ # Hide git directory r".*\/\.git\/.*" ] # References REFS_BRANCHES = "refs/heads/" REFS_REMOTES = "refs/remotes/" REFS_TAGS = "refs/tags/" # Name pattern truths # Used for detecting if files are : # - deleted # - added # - changed PATTERN_ADDED = (False, True) PATTERN_REMOVED = (True, False) PATTERN_MODIFIED = (True, True) # Permissions MODE_DIRECTORY = 040000 # Used to tell if a tree entry is a directory # Tree depth MAX_TREE_DEPTH = 1000 # Acceptable Root paths ROOT_PATHS = (os.path.curdir, os.path.sep) def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs): if isinstance(repo_or_path, DulwichRepo): self.repo = repo_or_path elif isinstance(repo_or_path, Gittle): self.repo = DulwichRepo(repo_or_path.path) elif isinstance(repo_or_path, basestring): path = os.path.abspath(repo_or_path) self.repo = DulwichRepo(path) else: logging.warning("Repo is of type %s" % type(repo_or_path)) raise Exception("Gittle must be initialized with either a dulwich repository or a string to the path") # Set path self.path = self.repo.path # The remote url self.origin_uri = origin_uri # Report client activty self._report_activity = report_activity # Build ignore filter self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES) self.hidden_regexes.extend(self._get_ignore_regexes()) self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes) self.filters = [self.ignore_filter] # Get authenticator if auth: self.authenticator = auth else: self.auth(*args, **kwargs) def report_activity(self, *args, **kwargs): if not self._report_activity: return return self._report_activity(*args, **kwargs) def _format_author(self, name, email): return "%s <%s>" % (name, email) def _format_userinfo(self, userinfo): name = userinfo.get("name") email = userinfo.get("email") if name and email: return self._format_author(name, email) return None def _format_ref(self, base, extra): return "".join([base, extra]) def _format_ref_branch(self, branch_name): return self._format_ref(self.REFS_BRANCHES, branch_name) def _format_ref_remote(self, remote_name): return self._format_ref(self.REFS_REMOTES, remote_name) def _format_ref_tag(self, tag_name): return self._format_ref(self.REFS_TAGS, tag_name) @property def head(self): """Return SHA of the current HEAD """ return self.repo.head() @property def is_bare(self): """Bare repositories have no working directories or indexes """ return self.repo.bare @property def is_working(self): return not (self.is_bare) def has_index(self): """Opposite of is_bare """ return self.repo.has_index() @property def has_commits(self): """ If the repository has no HEAD we consider that is has no commits """ try: self.repo.head() except KeyError: return False return True def ref_walker(self, ref=None): """ Very simple, basic walker """ ref = ref or "HEAD" sha = self._commit_sha(ref) return self.repo.revision_history(sha) def branch_walker(self, branch): branch = branch or self.DEFAULT_BRANCH ref = self._format_ref_branch(branch) return self.ref_walker(ref) def commit_info(self, start=0, end=None, branch=None): """Return a generator of commits with all their attached information """ if not self.has_commits: return [] commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)] if not end: return commits return commits[start:end] @funky.uniquify def recent_contributors(self, n=None, branch=None): n = n or 10 return funky.pluck(self.commit_info(end=n, branch=branch), "author") @property def commit_count(self): try: return len(self.ref_walker()) except KeyError: return 0 def commits(self): """Return a list of SHAs for all the concerned commits """ return [commit["sha"] for commit in self.commit_info()] @property def git_dir(self): return self.repo.controldir() def auth(self, *args, **kwargs): self.authenticator = GittleAuth(*args, **kwargs) return self.authenticator # Generate a branch selector (used for pushing) def _wants_branch(self, branch_name=None): branch_name = branch_name or self.DEFAULT_BRANCH refs_key = self._format_ref_branch(branch_name) sha = self.branches[branch_name] def wants_func(old): refs_key = self._format_ref_branch(branch_name) return {refs_key: sha} return wants_func def _get_ignore_regexes(self): gitignore_filename = os.path.join(self.path, ".gitignore") if not os.path.exists(gitignore_filename): return [] lines = open(gitignore_filename).readlines() globers = map(lambda line: line.rstrip(), lines) return utils.paths.globers_to_regex(globers) # Get the absolute path for a file in the git repo def abspath(self, repo_file): return os.path.abspath(os.path.join(self.path, repo_file)) # Get the relative path from the absolute path def relpath(self, abspath): return os.path.relpath(abspath, self.path) @property def last_commit(self): return self[self.repo.head()] @property def index(self): return self.repo.open_index() @classmethod def init(cls, path, bare=None, *args, **kwargs): """Initialize a repository""" mkdir_safe(path) # Constructor to use if bare: constructor = DulwichRepo.init_bare else: constructor = DulwichRepo.init # Create dulwich repo repo = constructor(path) # Create Gittle repo return cls(repo, *args, **kwargs) @classmethod def init_bare(cls, *args, **kwargs): kwargs.setdefault("bare", True) return cls.init(*args, **kwargs) def get_client(self, origin_uri=None, **kwargs): # Get the remote URL origin_uri = origin_uri or self.origin_uri # Fail if inexistant if not origin_uri: raise InvalidRemoteUrl() client_kwargs = {} auth_kwargs = self.authenticator.kwargs() client_kwargs.update(auth_kwargs) client_kwargs.update(kwargs) client_kwargs.update({"report_activity": self.report_activity}) client, remote_path = get_transport_and_path(origin_uri, **client_kwargs) return client, remote_path def push_to(self, origin_uri, branch_name=None, progress=None, progress_stderr=None): selector = self._wants_branch(branch_name=branch_name) client, remote_path = self.get_client(origin_uri, progress_stderr=progress_stderr) return client.send_pack(remote_path, selector, self.repo.object_store.generate_pack_contents, progress=progress) # Like: git push def push(self, origin_uri=None, branch_name=None, progress=None, progress_stderr=None): return self.push_to(origin_uri, branch_name, progress, progress_stderr) # Not recommended at ALL ... !!! def dirty_pull_from(self, origin_uri, branch_name=None): # Remove all previously existing data rmtree(self.path) mkdir_safe(self.path) self.repo = DulwichRepo.init(self.path) # Fetch brand new copy from remote return self.pull_from(origin_uri, branch_name) def pull_from(self, origin_uri, branch_name=None): return self.fetch(origin_uri) # Like: git pull def pull(self, origin_uri=None, branch_name=None): return self.pull_from(origin_uri, branch_name) def fetch_remote(self, origin_uri=None): # Get client client, remote_path = self.get_client(origin_uri=origin_uri) # Fetch data from remote repository remote_refs = client.fetch(remote_path, self.repo) return remote_refs def _setup_fetched_refs(self, refs, origin, bare): remote_tags = utils.git.subrefs(refs, "refs/tags") remote_heads = utils.git.subrefs(refs, "refs/heads") # Filter refs clean_remote_tags = utils.git.clean_refs(remote_tags) clean_remote_heads = utils.git.clean_refs(remote_heads) # Base of new refs heads_base = "refs/remotes/" + origin if bare: heads_base = "refs/heads" # Import branches self.import_refs(heads_base, clean_remote_heads) # Import tags self.import_refs("refs/tags", clean_remote_tags) # Update HEAD self["HEAD"] = refs["HEAD"] def fetch(self, origin_uri=None, bare=None, origin=None): bare = bare or False origin = origin or self.DEFAULT_REMOTE # Remote refs remote_refs = self.fetch_remote(origin_uri) # Update head # Hit repo because head doesn't yet exist so # print("REFS = %s" % remote_refs) # Update refs (branches, tags, HEAD) self._setup_fetched_refs(remote_refs, origin, bare) # Checkout working directories if not bare: self.checkout_all() else: self.update_server_info() @classmethod def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs): """Clone a remote repository""" mkdir_safe(local_path) # Initialize the local repository if bare: local_repo = cls.init_bare(local_path) else: local_repo = cls.init(local_path) repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs) repo.fetch(bare=bare) # Add origin # TODO return repo @classmethod def clone_bare(cls, *args, **kwargs): """Same as .clone except clones to a bare repository by default """ kwargs.setdefault("bare", True) return cls.clone(*args, **kwargs) def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs): if not tree: # If no tree then stage files modified_files = files or self.modified_files logging.warning("STAGING : %s" % modified_files) self.add(modified_files) # Messages message = message or self.DEFAULT_MESSAGE author_msg = self._format_userinfo(author) committer_msg = self._format_userinfo(committer) return self.repo.do_commit( message=message, author=author_msg, committer=committer_msg, encoding="UTF-8", tree=tree, *args, **kwargs ) def _tree_from_structure(self, structure): # TODO : Support directories tree = Tree() for file_info in structure: # str only try: data = file_info["data"].encode("ascii") name = file_info["name"].encode("ascii") mode = file_info["mode"] except: # Skip file on encoding errors continue blob = Blob() blob.data = data # Store file's contents self.repo.object_store.add_object(blob) # Add blob entry tree.add(name, mode, blob.id) # Store tree self.repo.object_store.add_object(tree) return tree.id # Like: git commmit -a def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs): user_info = {"name": name, "email": email} return self._commit(committer=user_info, author=user_info, message=message, files=files, *args, **kwargs) def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs): """Main use is to do commits directly to bare repositories For example doing a first Initial Commit so the repo can be cloned and worked on right away """ if not structure: return tree = self._tree_from_structure(structure) user_info = {"name": name, "email": email} return self._commit(committer=user_info, author=user_info, message=message, tree=tree, *args, **kwargs) # Push all local commits # and pull all remote commits def sync(self, origin_uri=None): self.push(origin_uri) return self.pull(origin_uri) def lookup_entry(self, relpath, trackable_files=set()): if not relpath in trackable_files: raise KeyError abspath = self.abspath(relpath) with open(abspath, "rb") as git_file: data = git_file.read() s = sha1() s.update("blob %u\0" % len(data)) s.update(data) return (s.hexdigest(), os.stat(abspath).st_mode) @property @funky.transform(set) def tracked_files(self): return list(self.index) @property @funky.transform(set) def raw_files(self): return utils.paths.subpaths(self.path) @property @funky.transform(set) def ignored_files(self): return utils.paths.subpaths(self.path, filters=self.filters) @property @funky.transform(set) def trackable_files(self): return self.raw_files - self.ignored_files @property @funky.transform(set) def untracked_files(self): return self.trackable_files - self.tracked_files """ @property @funky.transform(set) def modified_staged_files(self): "Checks if the file has changed since last commit" timestamp = self.last_commit.commit_time index = self.index return [ f for f in self.tracked_files if index[f][1][0] > timestamp ] """ # Return a list of tuples # representing the changed elements in the git tree def _changed_entries(self, ref=None): ref = ref or self.DEFAULT_COMMIT if not self.has_commits: return [] obj_sto = self.repo.object_store tree_id = self[ref].tree names = self.trackable_files lookup_func = partial(self.lookup_entry, trackable_files=names) # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...] tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False) return list(tree_diff) @funky.transform(set) def _changed_entries_by_pattern(self, pattern): changed_entries = self._changed_entries() filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) ] return filtered_paths @property @funky.transform(set) def removed_files(self): return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files @property @funky.transform(set) def added_files(self): return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files @property @funky.transform(set) def modified_files(self): modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files return modified_files @property @funky.transform(set) def modified_unstaged_files(self): timestamp = self.last_commit.commit_time return [f for f in self.tracked_files if os.stat(self.abspath(f)).st_mtime > timestamp] @property def pending_files(self): """ Returns a list of all files that could be possibly staged """ # Union of both return self.modified_files | self.added_files | self.removed_files @property def pending_files_by_state(self): files = {"modified": self.modified_files, "added": self.added_files, "removed": self.removed_files} # "Flip" the dictionary return {path: state for state, paths in files.items() for path in paths} """ @property @funky.transform(set) def modified_files(self): return self.modified_staged_files | self.modified_unstaged_files """ # Like: git add @funky.arglist_method def stage(self, files): return self.repo.stage(files) def add(self, *args, **kwargs): return self.stage(*args, **kwargs) # Like: git rm @funky.arglist_method def rm(self, files, force=False): index = self.index index_files = filter(lambda f: f in index, files) for f in index_files: del self.index[f] return index.write() def mv_fs(self, file_pair): old_name, new_name = file_pair os.rename(old_name, new_name) # Like: git mv @funky.arglist_method def mv(self, files_pair): index = self.index files_in_index = filter(lambda f: f[0] in index, files_pair) map(self.mv_fs, files_in_index) old_files = map(funky.first, files_in_index) new_files = map(funky.last, files_in_index) self.add(new_files) self.rm(old_files) self.add(old_files) return @working_only def _checkout_tree(self, tree): return build_index_from_tree(self.repo.path, self.repo.index_path(), self.repo.object_store, tree) def checkout_all(self, commit_sha=None): commit_sha = commit_sha or self.head commit_tree = self._commit_tree(commit_sha) # Rebuild index from the current tree return self._checkout_tree(commit_tree) def checkout(self, commit_sha=None, files=None): """Checkout only a select amount of files """ commit_sha = commit_sha or self.head files = files or [] return self @funky.arglist_method def reset(self, files, commit="HEAD"): pass def rm_all(self): self.index.clear() return self.index.write() def _to_commit(self, commit_obj): """Allows methods to accept both SHA's or dulwich Commit objects as arguments """ if isinstance(commit_obj, basestring): return self.repo[commit_obj] return commit_obj def _commit_sha(self, commit_obj): """Extracts a Dulwich commits SHA """ if utils.git.is_sha(commit_obj): return commit_obj elif isinstance(commit_obj, basestring): # Can't use self[commit_obj] to avoid infinite recursion commit_obj = self.repo[commit_obj] return commit_obj.id def _blob_data(self, sha): """Return a blobs content for a given SHA """ return self[sha].data # Get the nth parent back for a given commit def get_parent_commit(self, commit, n=None): """ Recursively gets the nth parent for a given commit Warning: Remember that parents aren't the previous commits """ if n is None: n = 1 commit = self._to_commit(commit) parents = commit.parents if n <= 0 or not parents: # Return a SHA return self._commit_sha(commit) parent_sha = parents[0] parent = self[parent_sha] # Recur return self.get_parent_commit(parent, n - 1) def get_previous_commit(self, commit_ref, n=None): commit_sha = self._parse_reference(commit_ref) n = n or 1 commits = self.commits() return funky.next(commits, commit_sha, n=n, default=commit_sha) def _parse_reference(self, ref_string): # COMMIT_REF~x if "~" in ref_string: ref, count = ref_string.split("~") count = int(count) commit_sha = self._commit_sha(ref) return self.get_previous_commit(commit_sha, count) return self._commit_sha(ref_string) def _commit_tree(self, commit_sha): """Return the tree object for a given commit """ return self[commit_sha].tree def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True): diff_type = diff_type or self.DEFAULT_DIFF_TYPE diff_func = self.DIFF_FUNCTIONS[diff_type] if not compare_to: compare_to = self.get_previous_commit(commit_sha) return self._diff_between(compare_to, commit_sha, diff_function=diff_func) def diff_working(self, ref=None, filter_binary=True): """Diff between the current working directory and the HEAD """ return utils.git.diff_changes_paths( self.repo.object_store, self.path, self._changed_entries(ref=ref), filter_binary=filter_binary ) def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None): """Returns a dict of the following Format : { "directory/filename.txt": { 'name': 'filename.txt', 'path': "directory/filename.txt", "sha": "xxxxxxxxxxxxxxxxxxxx", "data": "blablabla", "mode": 0xxxxx", }, ... } """ # Default values context = {} is_tree = is_tree or False parent_path = parent_path or "" if is_tree: tree = self[commit_sha] else: tree = self[self._commit_tree(commit_sha)] for mode, path, sha in tree.entries(): # Check if entry is a directory if mode == self.MODE_DIRECTORY: context.update( self.get_commit_files(sha, parent_path=os.path.join(parent_path, path), is_tree=True, paths=paths) ) continue subpath = os.path.join(parent_path, path) # Only add the files we want if not (paths is None or subpath in paths): continue # Add file entry context[subpath] = {"name": path, "path": subpath, "mode": mode, "sha": sha, "data": self._blob_data(sha)} return context def file_versions(self, path): """Returns all commits where given file was modified """ versions = [] commits_info = self.commit_info() seen_shas = set() for commit in commits_info: try: files = self.get_commit_files(commit["sha"], paths=[path]) file_path, file_data = files.items()[0] except IndexError: continue file_sha = file_data["sha"] if file_sha in seen_shas: continue else: seen_shas.add(file_sha) # Add file info commit["file"] = file_data versions.append(file_data) return versions def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True): """Internal method for getting a diff between two commits Please use .diff method unless you have very speciic needs """ # If commit is first commit (new_commit_sha == old_commit_sha) # then compare to an empty tree if new_commit_sha == old_commit_sha: old_tree = Tree() else: old_tree = self._commit_tree(old_commit_sha) new_tree = self._commit_tree(new_commit_sha) return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary) def changes(self, *args, **kwargs): """ List of changes between two SHAs Returns a list of lists of tuples : [ [ (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) ], ... ] """ kwargs["diff_type"] = "changes" return self.diff(*args, **kwargs) def changes_count(self, *args, **kwargs): return len(self.changes(*args, **kwargs)) def _refs_by_pattern(self, pattern): refs = self.refs def item_filter(key_value): """Filter only concered refs""" key, value = key_value return key.startswith(pattern) def item_map(key_value): """Rewrite keys""" key, value = key_value new_key = key[len(pattern) :] return (new_key, value) return dict(map(item_map, filter(item_filter, refs.items()))) @property def refs(self): return self.repo.get_refs() def set_refs(refs_dict): for k, v in refs_dict.items(): self.repo[k] = v def import_refs(self, base, other): return self.repo.refs.import_refs(base, other) @property def branches(self): return self._refs_by_pattern(self.REFS_BRANCHES) def _active_branch(self, refs=None, head=None): head = head or self.head refs = refs or self.branches try: return {branch: branch_head for branch, branch_head in refs.items() if branch_head == head}.items()[0] except IndexError: pass return (None, None) @property def active_branch(self): return self._active_branch()[0] @property def active_sha(self): return self._active_branch()[1] @property def remote_branches(self): return self._refs_by_pattern(self.REFS_REMOTES) @property def tags(self): return self._refs_by_pattern(self.REFS_TAGS) @property def remotes(self): """ Dict of remotes { 'origin': 'http://friendco.de/some_user/repo.git', ... } """ config = self.repo.get_config() return {keys[1]: values["url"] for keys, values in config.items() if keys[0] == "remote"} def add_ref(self, new_ref, old_ref): self.repo.refs[new_ref] = self.repo.refs[old_ref] self.update_server_info() def remove_ref(self, ref_name): # Returns False if ref doesn't exist if not ref_name in self.repo.refs: return False del self.repo.refs[ref_name] self.update_server_info() return True def create_branch(self, base_branch, new_branch, tracking=None): """Try creating a new branch which tracks the given remote if such a branch does not exist then branch off a local branch """ # The remote to track tracking = self.DEFAULT_REMOTE # Already exists if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) # Get information about remote_branch remote_branch = os.path.sep.join([tracking, base_branch]) # Fork Local if base_branch in self.branches: base_ref = self._format_ref_branch(base_branch) # Fork remote elif remote_branch in self.remote_branches: base_ref = self._format_ref_remote(remote_branch) # TODO : track else: raise Exception( "Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking) ) # Reference of new branch new_ref = self._format_ref_branch(new_branch) # Copy reference to create branch self.add_ref(new_ref, base_ref) return new_ref def remove_branch(self, branch_name): ref = self._format_ref_branch(branch_name) return self.remove_ref(ref) def switch_branch(self, branch_name, tracking=None, create=None): """Changes the current branch """ if create is None: create = True # Check if branch exists if not branch_name in self.branches: self.create_branch(branch_name, branch_name, tracking=tracking) # Get branch reference branch_ref = self._format_ref_branch(branch_name) # Change main branch self.repo.refs.set_symbolic_ref("HEAD", branch_ref) if self.is_working: # Remove all files self.clean_working() # Add files for the current branch self.checkout_all() def clean(self, force=None, directories=None): untracked_files = self.untracked_files map(os.remove, untracked_files) return untracked_files def clean_working(self): """Purges all the working (removes everything except .git) used by checkout_all to get clean branch switching """ return self.clean() def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None): tree = self[tree_sha] structure = {} if depth is None: depth = self.MAX_TREE_DEPTH elif depth == 0: return structure for mode, path, sha in tree.entries(): # tree if mode == self.MODE_DIRECTORY: # Recur structure[path] = self._get_fs_structure(sha, depth=depth - 1, parent_sha=tree_sha) # commit else: structure[path] = sha structure["."] = tree_sha structure[".."] = parent_sha or tree_sha return structure def _get_fs_structure_by_path(self, tree_sha, path): parts = path.split(os.path.sep) depth = len(parts) + 1 structure = self._get_fs_structure(tree_sha, depth=depth) return funky.subkey(structure, parts) def commit_ls(self, ref, subpath=None): """List a "directory" for a given commit using the tree of thqt commit """ tree_sha = self._commit_tree(ref) # Root path if subpath in self.ROOT_PATHS or not subpath: return self._get_fs_structure(tree_sha, depth=1) # Any other path return self._get_fs_structure_by_path(tree_sha, subpath) def commit_file(self, ref, path): """Return info on a given file for a given commit """ name, info = self.get_commit_files(ref, paths=[path]).items()[0] return info def commit_tree(self, ref, *args, **kwargs): tree_sha = self._commit_tree(ref) return self._get_fs_structure(tree_sha, *args, **kwargs) def update_server_info(self): if not self.is_bare: return update_server_info(self.repo) def _is_fast_forward(self): pass def _merge_fast_forward(self): pass def __hash__(self): """This is required otherwise the memoize function will just mess it up """ return hash(self.path) def __getitem__(self, key): sha = self._parse_reference(key) return self.repo[sha] def __setitem__(self, key, value): self.repo[key] = value # Alias to clone_bare fork = clone_bare log = commit_info diff_count = changes_count comtributors = recent_contributors
class GitHubPagesWriter: def __init__(self, *, repo='.', branch='gh-pages', remote='origin'): self.repo = Repo(repo) self.branch = branch self.remote = remote self.tree = {} @property def base_url(self): config = self.repo.get_config() url = config.get((b'remote', self.remote.encode()), b'url') client, path = get_transport_and_path(url.decode()) url = client.get_url(path) o = urlparse(url) assert o.hostname == 'github.com' path = o.path[1:] if path.endswith(".git"): path = path[:-4] user, repo = path.split('/') if repo == f'{user}.github.io': return f'https://{user}.github.io' else: return f'https://{user}.github.io/{repo}' def write_file(self, url, content): segs = PurePath(url).parts[1:] if url.endswith("/"): segs += ("index.html", ) tree = self.tree for s in segs[:-1]: subtree = tree.get(s, {}) tree[s] = subtree tree = subtree blob = Blob.from_string(content) self.repo.object_store.add_object(blob) tree[segs[-1]] = blob.id def write_tree(self, files): tree = Tree() for name, value in files.items(): if isinstance(value, dict): tree.add(name.encode('utf-8'), 0o040000, self.write_tree(value)) else: tree.add(name.encode('utf-8'), 0o100644, value) self.repo.object_store.add_object(tree) return tree.id def commit(self): self.write_file("/.nojekyll", b'') tree = self.write_tree(self.tree) branch = "refs/heads/" + self.branch commit = self.repo.do_commit(message=b'generate GitHub Pages', tree=tree, ref=branch.encode()) self.repo[branch.encode()] = commit def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): if exc_type is None: self.commit()
class Repo(object): """ An abstraction layer on top of dulwich.repo.Repo for higher-level git repository actions like: * adding only modified files * checking out whole trees (or paths within them) from refs * diffs with difflib * branching and tagging (both displaying and creating) * listing commits down from a ref Methods are structured to match git commands when appropriate. It also supports executing arbitrary git commands, if git is installed. Of course, everything else is implemented in pure python, so having git installed is optional. Should be considered a work-in-progress. """ def __init__(self, path): self.repo = DulwichRepo(path) # The inner Dulwich Repo object. self.root = path @classmethod def init(cls, path, mkdir=False, bare=False): """ Initializes a normal or bare repository. This is mostly a handoff to Dulwich. :param path: the path (which must be a directory) to create the repository within. :param mkdir: if True, make a directory at **path**. Equivalent to ``mkdir [path] && cd [path] && git init``. :param bare: if True, create a bare repository at the path. :return: a ``Repo`` instance. """ if bare: DulwichRepo.init_bare(path) else: DulwichRepo.init(path, mkdir) return cls(path) def add(self, path=None, all=False, add_new_files=True): """ Add files to the repository or staging area if new or modified. Equivalent to the ``git add`` command. :param path: the path to the file to add, relative to the repository root. :param all: if True, add all files under the given path. If **path** is omitted, the repository's root path will be used. :param add_new_files: if True, this command will also add new files. Note this is the default behavior. The option is provided for situations (e.g. ``git commit -a``) where adding new files would be undesirable. :return: list of filepaths that were added. If **path** is a file and **all** is True, only the single file will be added. If **path** is a directory and **all** is False, nothing will be added. Likewise, if both **path** and **all** are omitted, nothing will be added. Additionally, the ``add`` method checks to see if the path(s) have been modified. We don't want to create new blobs if we don't need them. """ # the implementation creates a list of paths and stages them using # dulwich.Repo.stage # Paths are a little tricky. To work with repositories independent # of the current working directory, we need absolute paths to files. # At the same time, git trees are relative to the repository root. # So, we have to do a few conversions. adds = [] # get an absolute path for doing isfile/isdir checks. if path is not None: path = os.path.join(self.root, path) # add all files within given path if path is not None and all: if os.path.isdir(path): # walk the directory for directory, dirnames, filenames in os.walk(directory): if '.git' in dirnames: # in case path is root, don't traverse the .git subdir dirnames.remove('.git') for f in filenames: path = os.path.join(directory, f) adds.append(path) elif os.path.isfile(path): adds.append(path) # add all files within root path elif path is None and all: # walk the root directory for directory, dirnames, filenames in os.walk(self.root): if '.git' in dirnames: # don't traverse the .git subdir dirnames.remove('.git') for f in filenames: path = os.path.join(directory, f) adds.append(path) # add file at path elif path is not None: # add only if file if os.path.isfile(path): adds.append(path) # back to relative paths, so we can add them to the tree. rels = [] for p in adds: # get the path relative to repo root. rels.append(os.path.relpath(p, self.root)) adds = rels # filter unmodified files (and untracked files if not add_new_files) if add_new_files: adds = [f for f in adds if self._file_is_modified(f) or \ not self._file_in_tree(f)] else: adds = [f for f in adds if self._file_is_modified(f)] # don't waste time with stage if empty list. if adds: self.repo.stage(adds) return adds def branch(self, name=None, ref=None): """ Create a new branch or display the current one. Equivalent to `git branch`. :param name: the name of the branch :param ref: a commit reference (branch, tag, or SHA). Same idea as the git-branch ``--start-point`` option. Will create the branch off of the commit. Defaults to HEAD. :return: None on create, branch name on display. When the name param is not given, the current branch will be returned as a string using the branch's full name (i.e. ``refs/heads/[branch_name]``). """ # create a branch if name is not None: if ref is None: ref = self.head().id else: ref = self._resolve_ref(ref) self.repo.refs['refs/heads/%s' % name] = ref # display the name of the current branch else: # couldn't find an easy way to get it out of dulwich, # which resolves HEAD to the commit, so we'll just read # .git/HEAD directly. path = os.path.join(self.repo._controldir, 'HEAD') if os.path.isfile(path): with open(path, 'r') as fp: return fp.read().strip()[5:] def checkout(self, ref, path=None): """ Checkout the entire tree (or a subset) of a commit given a branch, tag, or commit SHA. This is a fairly naive implementation. It will just write the blob data recursively from the tree pointed at by the given reference, overwriting the working tree as necessary. It doesn't do deletions or renames. If you wanted to checkout 'HEAD': >>> repo.checkout(repo.head()) If you wanted to checkout the master branch: >>> repo.checkout('master') If you wanted to checkout v1.2 (i.e. a tag): >>> repo.checkout('v1.2') :param ref: branch, tag, or commit :param path: checkout only file or directory at path, should be relative to the repo's root. :raises KeyError: if bad reference. """ sha = self._resolve_ref(ref) obj = self.repo[sha] tree = self.repo[obj.tree] if tree is None: raise KeyError('Bad reference: %s' % ref) if path is None: path = self.root else: # check if path and self.root are same if not os.path.samefile(path, self.root): # if not, we need the path's tree # (a sub-tree of the commit tree) tree = self._obj_from_tree(tree, path) # write the tree self._write_tree_to_wt(tree, path) def cmd(self, cmd): """ Run a raw git command from the shell and return any output. Unlike other methods (which depend on Dulwich's git reimplementation and not git itself), this is dependent on the git shell command. The given git subcommand and arguments are prefixed with ``git`` and run through the subprocess module. To maintain the class's indifference to the current working directory, we also prepend the ``--git-dir`` and ``--work-tree`` arguments. :param cmd: A list of command-line arguments (anything the subprocess module will take). :return: a string containing the command's output. **Usage** (output has been truncated for brevity): >>> repo.cmd(['checkout', '-q', 'master']) >>> repo.cmd(['commit', '-q', '-a', '-m', 'Initial Commit']) >>> repo.cmd(['remote', '-v']) "origin [email protected]:hopper.git (fetch)\\n\\n origin ..." >>> repo.cmd(['log']) "commit 68a116eaee458607a3a9cf852df4f358a02bdb92\\nAuthor: Ni..." As you can see, it doesn't do any parsing of the output. It's available for times when the other methods don't get the job done. """ if not type(cmd) is list: raise TypeError('cmd must be a list') git_dir = os.path.join(self.root, '.git') prefix = ['git', '--git-dir', git_dir, '--work-tree', self.root] # It would be nice to use check_output() here, but it's 2.7+ return subprocess.Popen(prefix + cmd, stdout=subprocess.PIPE).communicate()[0] def commit(self, all=False, **kwargs): """ Commit the changeset to the repository. Equivalent to the `git commit` command. This method does a commit; use the ``commits`` method to retrieve one or more commits. Uses ``dulwich.objects.BaseRepo.do_commit()``, see that for params. At minimum, you need to provide **committer** and **message**. Everything else will be defaulted. :param all: commit all modified files that are already being tracked. :param \*\*kwargs: the commit attributes (e.g. committer, message, etc.). Again, see the underlying dulwich method. """ if all: # add all changes (to already tracked files) self.add(all=True, add_new_files=False) # pass the kwargs to dulwich, get the returned commit id. commit_id = self.repo.do_commit(**kwargs) # return the Commit object (instead of the id, which is less useful). return self.repo[commit_id] def commits(self, ref=None, n=10): """ Return up to n-commits down from a ref (branch, tag, commit), or if no ref given, down from the HEAD. If you just want a single commit, it may be cleaner to use the ``object`` method. :param ref: a branch, tag (not yet), or commit SHA to use as a start point. :param n: the maximum number of commits to return. If fewer matching commits exist, only they will be returned. :return: a list of ``dulwich.objects.Commit`` objects. **Usage**: >>> repo.commits() [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>, <Commit 6336f47615da32d520a8d52223b9817ee50ca728>] >>> repo.commits()[0] == repo.head() True >>> repo.commits(n=1) [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>] >>> repo.commits('6336f47615da32d520a8d52223b9817ee50ca728', n=1) [<Commit 6336f47615da32d520a8d52223b9817ee50ca728>] """ start_point = self.head().id if ref is not None: start_point = self._resolve_ref(ref) return self.repo.revision_history(start_point)[:n] def diff(self, a, b=None, path=None): """ Return a diff of commits a and b. :param a: a commit identifier. :param b: a commit identifier. Defaults to HEAD. :param path: a path to a file or directory to diff, relative to the repo root. Defaults to the entire tree. """ if not os.path.isfile(os.path.join(self.root, path)): raise NotImplementedError('Specify a file path for now') return self._diff_file(path, a, b) def head(self): """Return the HEAD commit or raise an error.""" # It seems best to make this a function so we don't have to # set and continually update it. try: return self.repo['HEAD'] except KeyError: # The HEAD will be missing before the repo is committed to. raise NoHeadSet def is_dirty(self): """Return True if there are uncommitted changes to the repository.""" new, modified, deleted = self.status() if new or modified or deleted: return True return False def object(self, sha): """ Retrieve an object from the repository. :param sha: the 40-byte hex-rep of the object's SHA1 identifier. """ return self.repo[sha] def status(self, from_path=None): """ Compare the working directory with HEAD. :param from_path: show changes within this path, which must be a file or directory relative to the repo. :return: a tuple containing three lists: new, modified, deleted """ # TODO: also compare the index and HEAD, or the index and WT # use from_path if set, otherwise root. if from_path is not None: from_path = os.path.join(self.root, from_path) if not os.path.exists(from_path): raise OSError('from_path does not exist.') path = from_path else: path = self.root # store changes in dictionary changes = {} changes['new'] = [] changes['modified'] = [] changes['deleted'] = [] # path is a file if os.path.isfile(path): status = self._file_status(path) if status == FILE_IS_NEW: changes['new'].append(path) elif status == FILE_IS_MODIFIED: changes['modified'].append(path) elif status == FILE_IS_DELETED: changes['deleted'].append(path) # path is a directory elif os.path.isdir(path): for directory, dirnames, filenames in os.walk(path): if '.git' in dirnames: dirnames.remove('.git') for f in filenames: fpath = os.path.relpath(os.path.join(directory, f), self.root) status = self._file_status(fpath) if status == FILE_IS_NEW: changes['new'].append(fpath) elif status == FILE_IS_MODIFIED: changes['modified'].append(fpath) elif status == FILE_IS_DELETED: changes['deleted'].append(fpath) return changes['new'], changes['modified'], changes['deleted'] def tag(self, name, ref=None): """ Create a tag. :param name: name of the new tag (e.g. 'v1.0' or '1.0.6') :param ref: a commit ref to tag, defaults to HEAD. """ # TODO: display tags attached to HEAD when no args. if ref is None: ref = self.head().id ref = self._resolve_ref(ref) self.repo.refs['refs/tags/%s' % name] = ref def tree(self, sha=None): """ Return the tree with given SHA, or if no SHA given, return the HEAD commit's tree. Raise an error if an object matches the SHA, but is not a tree. :param sha: tree reference. Note that a commit reference would not work. To get a commit's tree, just provide ``c.tree``, which contains the SHA we need. """ if sha is None: obj = self.repo[self.head().tree] else: obj = self.repo[sha] if type(obj) is Tree: return obj else: raise NotTreeError('Object is not a Tree') def _file_status(self, path, ref=None): """ Checks the status of a file in the working tree relative to a commit (usually HEAD). Statuses include: new, modified, and deleted. These statuses are conveyed as constants:: FILE_IS_UNCHANGED = 0 FILE_IS_NEW = 1 FILE_IS_MODIFIED = 2 FILE_IS_DELETED = 3 :param path: file path relative to the repo :param ref: optional ref to compare the WT with, default is HEAD. :return: status constant """ full_path = os.path.join(self.root, path) in_work_tree = os.path.exists(full_path) in_tree = self._file_in_tree(path) # new if not in_tree and in_work_tree: return FILE_IS_NEW # deleted elif in_tree and not in_work_tree: return FILE_IS_DELETED # modified elif in_tree and in_work_tree and self._file_is_modified(path): return FILE_IS_MODIFIED # unchanged elif in_tree and in_work_tree: return FILE_IS_UNCHANGED # does not exist (at least in our 2-tree world) else: raise KeyError('Path not found in either tree.') def _file_is_modified(self, path, ref=None): """ Returns True if the current file (in the WT) has been modified from the blob in the commit's tree, False otherwise. :param path: path to the file relative to the repository root. :param ref: optional ref to compare the WT with, default is HEAD. This returns False for new files (not present in the tree). If this is unexpected, just call ``_file_in_tree`` first. It assumes that the given path does exist. Just expect an OSError if it doesn't. """ # handle no head scenario when this gets called before first commit try: self.head() except NoHeadSet: return False # get the tree tree = self.repo[self.head().tree] # get the blob from the tree blob1 = self._obj_from_tree(tree, path) if type(blob1) is not Blob: return False # make a second blob from the current file with open(os.path.join(self.root, path), 'r') as fp: blob2 = Blob.from_string(fp.read()) # are the two blobs equivalent? # if their contents are the same they should be... # calls dulwich.objects.ShaFile.__eq__, which just compares SHAs return blob1 != blob2 def _file_in_tree(self, path, ref=None): """ Returns True if the file corresponds to a blob in the HEAD commit's tree, False otherwise. :param path: path to the file relative to the repository root. :param ref: optional ref to compare the WT with, default is HEAD. """ # handle no head scenario when this gets called before first commit try: self.head() except NoHeadSet: return False # get the tree tree = self.repo[self.head().tree] if self._obj_from_tree(tree, path) is not None: return True return False def _apply_to_tree(self, tree, f, path=None): """ Walk a tree recursively and apply function, f, to each entry :param tree: a dulwich.objects.Tree object :param f: function that will be called with each entry. :param path: if provided, the path relative to the repository will be included in the function call. """ if type(tree) is not Tree: raise NotTreeError for entry in tree.iteritems(): f(entry, path) if path else f(entry) obj = self.repo[entry.sha] if type(obj) is Tree: new_path = os.path.join(path, f) if path else None self._apply_to_tree(obj, f, new_path) def _obj_from_tree(self, tree, path): """ Walk a tree recursively to retrieve and return a blob or sub-tree from the given path, or return None if one does not exist. :param tree: a dulwich.objects.Tree object. :param path: path relative to the repository root. :return: Tree object, Blob object, or None if the path could not be found. For example, providing ``hopper/git.py`` would return the ``git.py`` blob within the ``hopper`` sub-tree. """ if type(tree) is not Tree: raise NotTreeError('Object is not a tree') # remove trailing slashes from path (so basename doesn't return '') if path[-1] == os.sep: path = path[:-1] # we need the head of the path, which is either the file itself or a # directory. head = path.split(os.sep)[0] if len(head) > 1: # clip head from path for recursion new_path = os.sep.join(path.split(os.sep)[1:]) for entry in tree.iteritems(): # these are dulwich.objects.TreeEntry objects if entry.path == head: # get the Tree or Blob. obj = self.repo[entry.sha] # return if we're at the right path if head == path: return obj # otherwise recurse if it's a Tree elif type(obj) is Tree: return self._obj_from_tree(obj, new_path) # if we get here the path wasn't there. return None def _write_tree_to_wt(self, tree, basepath): """ Walk a tree recursively and write each blob's data to the working tree. :param tree: a dulwich.objects.Tree object. :param basepath: blob data is written to: ``os.path.join(basepath, blob_path)``. Recursive calls will append the sub-tree name to the original call. """ if type(tree) is not Tree: raise NotTreeError('Object is not a tree') for entry in tree.iteritems(): obj = self.repo[entry.sha] if type(obj) is Blob: path = os.path.join(basepath, entry.path) with open(path, 'wb') as fp: fp.write(obj.data) elif type(obj) is Tree: new_basepath = os.path.join(basepath, entry.path) self._write_tree_to_wt(obj, new_basepath) def _resolve_ref(self, ref): """ Resolve a reference to a commit SHA. :param ref: branch, tag, commit reference. :return: a commit SHA. :raises KeyError: if ref doesn't point to a commit. :raises TypeError: if ref is not a string. """ # order: branch -> tag -> commit # (tag and branch can have same name, git assumes branch) if type(ref) is not str: raise TypeError('ref must be a string') # dulwich.Repo.refs keys the full name # (i.e. 'refs/heads/master') for branches and tags branch = _expand_branch_name(ref) tag = _expand_tag_name(ref) # branch? if branch in self.repo.refs: # get the commit SHA that the branch points to return self.repo[branch].id # tag? elif tag in self.repo.refs: return self.repo[tag].id # commit? else: obj = self.repo[ref] if type(obj) is Commit: return obj.id else: raise KeyError('Bad reference: %s' % ref) def _diff_file(self, path, a, b=None, html=False): """ Use difflib to compare a file between two commits, or a single commit and the working tree. :param a: ref to commit a. :param b: ref to commit b, defaults to the working tree. :param path: path to file, relative to repo root. :param html: format using difflib.HtmlDiff. :raise NotBlobError: if path wasn't present in both trees. """ # resolve commit a = self._resolve_ref(a) # get the trees tree1 = self.repo[self.repo[a].tree] # get the blob blob1 = self._obj_from_tree(tree1, path) # set data or empty string (meaning no blob at path) data1 = blob1.data if type(blob1) is Blob else '' if b is None: with open(os.path.join(self.root, path), 'r') as fp: data2 = fp.read() else: b = self._resolve_ref(b) tree2 = self.repo[self.repo[b].tree] blob2 = self._obj_from_tree(tree2, path) data2 = blob2.data if type(blob2) is Blob else '' # if both blobs were missing => bad path if type(blob1) is not Blob and type(blob2) is not Blob: raise NotBlobError('Path did not point to a blob in either tree') diff = list(difflib.context_diff(data1.splitlines(), data2.splitlines())) return '\n'.join(diff)
class Wiki(HookMixin): path = None base_path = '/' default_ref = 'master' default_committer_name = 'Anon' default_committer_email = '*****@*****.**' index_page = 'home' repo = None def __init__(self, path): try: self.repo = Repo(path) except NotGitRepository: self.repo = Repo.init(path, mkdir=True) # TODO add first commit here self.path = path def __repr__(self): return "Wiki: %s" % self.path def commit(self, name, email, message, files): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ if isinstance(name, text_type): name = name.encode('utf-8') if isinstance(email, text_type): email = email.encode('utf-8') if isinstance(message, text_type): message = message.encode('utf-8') author = committer = "%s <%s>".format(name, email).encode() self.repo.stage(files) return self.repo.do_commit(message=message, committer=committer, author=author) def get_page(self, name, sha='HEAD'): """Get page data, partials, commit info. :param name: Name of page. :param sha: Commit sha. :return: dict """ return WikiPage(name, self, sha=sha) def get_index(self): """Get repo index of head. :return: list -- List of dicts """ rv = [] index = self.repo.open_index() for name in index: rv.append( dict(name=filename_to_cname(name), filename=name, ctime=index[name].ctime[0], mtime=index[name].mtime[0], sha=index[name].sha, size=index[name].size)) return rv
class Wiki(HookMixin): path = None base_path = '/' default_ref = 'master' default_committer_name = 'Anon' default_committer_email = '*****@*****.**' index_page = 'home' repo = None def __init__(self, path): try: self.repo = Repo(path) except NotGitRepository: self.repo = Repo.init(path, mkdir=True) # TODO add first commit here self.path = path def __repr__(self): return "Wiki: {0}".format(self.path) def commit(self, name, email, message, files): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ if isinstance(name, text_type): name = name.encode('utf-8') if isinstance(email, text_type): email = email.encode('utf-8') if isinstance(message, text_type): message = message.encode('utf-8') author = committer = "{0} <{1}>".format(name, email).encode() self.repo.stage(files) return self.repo.do_commit(message=message, committer=committer, author=author) def get_page(self, name, sha='HEAD'): """Get page data, partials, commit info. :param name: Name of page. :param sha: Commit sha. :return: dict """ return WikiPage(name, self, sha=sha) def get_index(self): """Get repo index of head. :return: list -- List of dicts """ rv = [] index = self.repo.open_index() for name in index: rv.append(dict(name=filename_to_cname(name), filename=name, ctime=index[name].ctime[0], mtime=index[name].mtime[0], sha=index[name].sha, size=index[name].size)) return rv
class BaseFetcher(): def __init__(self, temp_parent_dir=None, repo_parent_dir=None): if temp_parent_dir is not None: self._temp_location = temp_parent_dir + "/" + self.name Path(self._temp_location).mkdir(exist_ok=True) if repo_parent_dir is not None: self._repo_location = repo_parent_dir + "/" + self.name Path(self._repo_location).mkdir(exist_ok=True) try: self.repo_load() except ValueError: self.repo_initialize() else: self.has_repo = False self.repo = None @property def name(self): return self._name @property def repo_location(self): return self._repo_location def retrieve_metadata(self): raise NotImplementedError() def retrieve_data(self, dataset): raise NotImplementedError() def update_data(self, from_date): raise NotImplementedError() def download_file(self, url, params, stream=False, local_repo_file=None): """ Downloads a file and returns a BytesIO buffer. Specify if download needs to happen on a stream, default False. If the fetcher has a local repo, the local_repo_file string can be provided to allow storing the file in the repo (relative file path to the repo base path). If no local_repo_file is provided (None), the file is NOT stored in the repo regardless of the fetcher having a repo or not. """ log.info("Downloading file from url: {} - params: {}".format( url, params)) rsp = requests.get(url, params, stream=stream) if rsp.status_code == 200: fb = BytesIO() for chunk in rsp.iter_content(chunk_size=None): if chunk: fb.write(chunk) fb.seek(0) if self.has_repo and local_repo_file is not None: log.info("Saving to local file {}".format(local_repo_file)) ds = local_repo_file.split('/') file_name = ds[-1] file_sublocation = "/".join(ds[0:-1]) dir_path = Path(self._repo_location + '/' + file_sublocation) dir_path.mkdir(parents=True, exist_ok=True) file_path = dir_path.joinpath(file_name) with open(file_path, 'wb') as f: f.write(fb.read()) fb.seek(0) self.repo.stage([local_repo_file]) return fb else: raise RuntimeError( "Downloading the requested file failed wit response status {}." .format(rsp.status_code)) def repo_initialize(self): """ Initializes a repo on disk where source data should be stored. The subdirectory of parent_location is based off the name of the Fetcher. """ pth = Path(self.repo_location) gitpth = Path(self.repo_location + '/.git') if not pth.is_dir(): pth.mkdir() if gitpth.is_dir(): raise ValueError("Requested location already contains a repo.") self.repo = Repo.init(self.repo_location) self.has_repo = True def repo_load(self): """ Loads an existing repo on disk. The repo should be located in the subdirectory of parent_location based off the name of the Fetcher. """ gitpth = Path(self.repo_location + '/.git') if gitpth.is_dir(): self.repo = Repo(self.repo_location) self.has_repo = True else: raise ValueError("Requested repo does not exist.") def repo_commit(self, msg): if self.has_repo: return self.repo.do_commit(msg.encode('UTF-8')) else: raise RuntimeError("Repo not initialized / loaded.")
class Repo(object): """ An abstraction layer on top of dulwich.repo.Repo for higher-level git repository actions like: * adding only modified files * checking out whole trees (or paths within them) from refs * diffs with difflib * branching and tagging (both displaying and creating) * listing commits down from a ref Methods are structured to match git commands when appropriate. It also supports executing arbitrary git commands, if git is installed. Of course, everything else is implemented in pure python, so having git installed is optional. Should be considered a work-in-progress. """ def __init__(self, path): self.repo = DulwichRepo(path) # The inner Dulwich Repo object. self.root = path @classmethod def init(cls, path, mkdir=False, bare=False): """ Initializes a normal or bare repository. This is mostly a handoff to Dulwich. :param path: the path (which must be a directory) to create the repository within. :param mkdir: if True, make a directory at **path**. Equivalent to ``mkdir [path] && cd [path] && git init``. :param bare: if True, create a bare repository at the path. :return: a ``Repo`` instance. """ if bare: DulwichRepo.init_bare(path) else: DulwichRepo.init(path, mkdir) return cls(path) def add(self, path=None, all=False, add_new_files=True): """ Add files to the repository or staging area if new or modified. Equivalent to the ``git add`` command. :param path: the path to the file to add, relative to the repository root. :param all: if True, add all files under the given path. If **path** is omitted, the repository's root path will be used. :param add_new_files: if True, this command will also add new files. Note this is the default behavior. The option is provided for situations (e.g. ``git commit -a``) where adding new files would be undesirable. :return: list of filepaths that were added. If **path** is a file and **all** is True, only the single file will be added. If **path** is a directory and **all** is False, nothing will be added. Likewise, if both **path** and **all** are omitted, nothing will be added. Additionally, the ``add`` method checks to see if the path(s) have been modified. We don't want to create new blobs if we don't need them. """ # the implementation creates a list of paths and stages them using # dulwich.Repo.stage # Paths are a little tricky. To work with repositories independent # of the current working directory, we need absolute paths to files. # At the same time, git trees are relative to the repository root. # So, we have to do a few conversions. adds = [] # get an absolute path for doing isfile/isdir checks. if path is not None: path = os.path.join(self.root, path) # add all files within given path if path is not None and all: if os.path.isdir(path): # walk the directory for directory, dirnames, filenames in os.walk(directory): if '.git' in dirnames: # in case path is root, don't traverse the .git subdir dirnames.remove('.git') for f in filenames: path = os.path.join(directory, f) adds.append(path) elif os.path.isfile(path): adds.append(path) # add all files within root path elif path is None and all: # walk the root directory for directory, dirnames, filenames in os.walk(self.root): if '.git' in dirnames: # don't traverse the .git subdir dirnames.remove('.git') for f in filenames: path = os.path.join(directory, f) adds.append(path) # add file at path elif path is not None: # add only if file if os.path.isfile(path): adds.append(path) # back to relative paths, so we can add them to the tree. rels = [] for p in adds: # get the path relative to repo root. rels.append(os.path.relpath(p, self.root)) adds = rels # filter unmodified files (and untracked files if not add_new_files) if add_new_files: adds = [f for f in adds if self._file_is_modified(f) or \ not self._file_in_tree(f)] else: adds = [f for f in adds if self._file_is_modified(f)] # don't waste time with stage if empty list. if adds: self.repo.stage(adds) return adds def branch(self, name=None, ref=None): """ Create a new branch or display the current one. Equivalent to `git branch`. :param name: the name of the branch :param ref: a commit reference (branch, tag, or SHA). Same idea as the git-branch ``--start-point`` option. Will create the branch off of the commit. Defaults to HEAD. :return: None on create, branch name on display. When the name param is not given, the current branch will be returned as a string using the branch's full name (i.e. ``refs/heads/[branch_name]``). """ # create a branch if name is not None: if ref is None: ref = self.head().id else: ref = self._resolve_ref(ref) self.repo.refs['refs/heads/%s' % name] = ref # display the name of the current branch else: # couldn't find an easy way to get it out of dulwich, # which resolves HEAD to the commit, so we'll just read # .git/HEAD directly. path = os.path.join(self.repo._controldir, 'HEAD') if os.path.isfile(path): with open(path, 'r') as fp: return fp.read().strip()[5:] def checkout(self, ref, path=None): """ Checkout the entire tree (or a subset) of a commit given a branch, tag, or commit SHA. This is a fairly naive implementation. It will just write the blob data recursively from the tree pointed at by the given reference, overwriting the working tree as necessary. It doesn't do deletions or renames. If you wanted to checkout 'HEAD': >>> repo.checkout(repo.head()) If you wanted to checkout the master branch: >>> repo.checkout('master') If you wanted to checkout v1.2 (i.e. a tag): >>> repo.checkout('v1.2') :param ref: branch, tag, or commit :param path: checkout only file or directory at path, should be relative to the repo's root. :raises KeyError: if bad reference. """ sha = self._resolve_ref(ref) obj = self.repo[sha] tree = self.repo[obj.tree] if tree is None: raise KeyError('Bad reference: %s' % ref) if path is None: path = self.root else: # check if path and self.root are same if not os.path.samefile(path, self.root): # if not, we need the path's tree # (a sub-tree of the commit tree) tree = self._obj_from_tree(tree, path) # write the tree self._write_tree_to_wt(tree, path) def cmd(self, cmd): """ Run a raw git command from the shell and return any output. Unlike other methods (which depend on Dulwich's git reimplementation and not git itself), this is dependent on the git shell command. The given git subcommand and arguments are prefixed with ``git`` and run through the subprocess module. To maintain the class's indifference to the current working directory, we also prepend the ``--git-dir`` and ``--work-tree`` arguments. :param cmd: A list of command-line arguments (anything the subprocess module will take). :return: a string containing the command's output. **Usage** (output has been truncated for brevity): >>> repo.cmd(['checkout', '-q', 'master']) >>> repo.cmd(['commit', '-q', '-a', '-m', 'Initial Commit']) >>> repo.cmd(['remote', '-v']) "origin [email protected]:hopper.git (fetch)\\n\\n origin ..." >>> repo.cmd(['log']) "commit 68a116eaee458607a3a9cf852df4f358a02bdb92\\nAuthor: Ni..." As you can see, it doesn't do any parsing of the output. It's available for times when the other methods don't get the job done. """ if not type(cmd) is list: raise TypeError('cmd must be a list') git_dir = os.path.join(self.root, '.git') prefix = ['git', '--git-dir', git_dir, '--work-tree', self.root] # It would be nice to use check_output() here, but it's 2.7+ return subprocess.Popen(prefix + cmd, stdout=subprocess.PIPE).communicate()[0] def commit(self, all=False, **kwargs): """ Commit the changeset to the repository. Equivalent to the `git commit` command. This method does a commit; use the ``commits`` method to retrieve one or more commits. Uses ``dulwich.objects.BaseRepo.do_commit()``, see that for params. At minimum, you need to provide **committer** and **message**. Everything else will be defaulted. :param all: commit all modified files that are already being tracked. :param \*\*kwargs: the commit attributes (e.g. committer, message, etc.). Again, see the underlying dulwich method. """ if all: # add all changes (to already tracked files) self.add(all=True, add_new_files=False) # pass the kwargs to dulwich, get the returned commit id. commit_id = self.repo.do_commit(**kwargs) # return the Commit object (instead of the id, which is less useful). return self.repo[commit_id] def commits(self, ref=None, n=10): """ Return up to n-commits down from a ref (branch, tag, commit), or if no ref given, down from the HEAD. If you just want a single commit, it may be cleaner to use the ``object`` method. :param ref: a branch, tag (not yet), or commit SHA to use as a start point. :param n: the maximum number of commits to return. If fewer matching commits exist, only they will be returned. :return: a list of ``dulwich.objects.Commit`` objects. **Usage**: >>> repo.commits() [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>, <Commit 6336f47615da32d520a8d52223b9817ee50ca728>] >>> repo.commits()[0] == repo.head() True >>> repo.commits(n=1) [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>] >>> repo.commits('6336f47615da32d520a8d52223b9817ee50ca728', n=1) [<Commit 6336f47615da32d520a8d52223b9817ee50ca728>] """ start_point = self.head().id if ref is not None: start_point = self._resolve_ref(ref) return self.repo.revision_history(start_point)[:n] def diff(self, a, b=None, path=None): """ Return a diff of commits a and b. :param a: a commit identifier. :param b: a commit identifier. Defaults to HEAD. :param path: a path to a file or directory to diff, relative to the repo root. Defaults to the entire tree. """ if not os.path.isfile(os.path.join(self.root, path)): raise NotImplementedError('Specify a file path for now') return self._diff_file(path, a, b) def head(self): """Return the HEAD commit or raise an error.""" # It seems best to make this a function so we don't have to # set and continually update it. try: return self.repo['HEAD'] except KeyError: # The HEAD will be missing before the repo is committed to. raise NoHeadSet def is_dirty(self): """Return True if there are uncommitted changes to the repository.""" new, modified, deleted = self.status() if new or modified or deleted: return True return False def object(self, sha): """ Retrieve an object from the repository. :param sha: the 40-byte hex-rep of the object's SHA1 identifier. """ return self.repo[sha] def status(self, from_path=None): """ Compare the working directory with HEAD. :param from_path: show changes within this path, which must be a file or directory relative to the repo. :return: a tuple containing three lists: new, modified, deleted """ # TODO: also compare the index and HEAD, or the index and WT # use from_path if set, otherwise root. if from_path is not None: from_path = os.path.join(self.root, from_path) if not os.path.exists(from_path): raise OSError('from_path does not exist.') path = from_path else: path = self.root # store changes in dictionary changes = {} changes['new'] = [] changes['modified'] = [] changes['deleted'] = [] # path is a file if os.path.isfile(path): status = self._file_status(path) if status == FILE_IS_NEW: changes['new'].append(path) elif status == FILE_IS_MODIFIED: changes['modified'].append(path) elif status == FILE_IS_DELETED: changes['deleted'].append(path) # path is a directory elif os.path.isdir(path): for directory, dirnames, filenames in os.walk(path): if '.git' in dirnames: dirnames.remove('.git') for f in filenames: fpath = os.path.relpath(os.path.join(directory, f), self.root) status = self._file_status(fpath) if status == FILE_IS_NEW: changes['new'].append(fpath) elif status == FILE_IS_MODIFIED: changes['modified'].append(fpath) elif status == FILE_IS_DELETED: changes['deleted'].append(fpath) return changes['new'], changes['modified'], changes['deleted'] def tag(self, name, ref=None): """ Create a tag. :param name: name of the new tag (e.g. 'v1.0' or '1.0.6') :param ref: a commit ref to tag, defaults to HEAD. """ # TODO: display tags attached to HEAD when no args. if ref is None: ref = self.head().id ref = self._resolve_ref(ref) self.repo.refs['refs/tags/%s' % name] = ref def tree(self, sha=None): """ Return the tree with given SHA, or if no SHA given, return the HEAD commit's tree. Raise an error if an object matches the SHA, but is not a tree. :param sha: tree reference. Note that a commit reference would not work. To get a commit's tree, just provide ``c.tree``, which contains the SHA we need. """ if sha is None: obj = self.repo[self.head().tree] else: obj = self.repo[sha] if type(obj) is Tree: return obj else: raise NotTreeError('Object is not a Tree') def _file_status(self, path, ref=None): """ Checks the status of a file in the working tree relative to a commit (usually HEAD). Statuses include: new, modified, and deleted. These statuses are conveyed as constants:: FILE_IS_UNCHANGED = 0 FILE_IS_NEW = 1 FILE_IS_MODIFIED = 2 FILE_IS_DELETED = 3 :param path: file path relative to the repo :param ref: optional ref to compare the WT with, default is HEAD. :return: status constant """ full_path = os.path.join(self.root, path) in_work_tree = os.path.exists(full_path) in_tree = self._file_in_tree(path) # new if not in_tree and in_work_tree: return FILE_IS_NEW # deleted elif in_tree and not in_work_tree: return FILE_IS_DELETED # modified elif in_tree and in_work_tree and self._file_is_modified(path): return FILE_IS_MODIFIED # unchanged elif in_tree and in_work_tree: return FILE_IS_UNCHANGED # does not exist (at least in our 2-tree world) else: raise KeyError('Path not found in either tree.') def _file_is_modified(self, path, ref=None): """ Returns True if the current file (in the WT) has been modified from the blob in the commit's tree, False otherwise. :param path: path to the file relative to the repository root. :param ref: optional ref to compare the WT with, default is HEAD. This returns False for new files (not present in the tree). If this is unexpected, just call ``_file_in_tree`` first. It assumes that the given path does exist. Just expect an OSError if it doesn't. """ # handle no head scenario when this gets called before first commit try: self.head() except NoHeadSet: return False # get the tree tree = self.repo[self.head().tree] # get the blob from the tree blob1 = self._obj_from_tree(tree, path) if type(blob1) is not Blob: return False # make a second blob from the current file with open(os.path.join(self.root, path), 'r') as fp: blob2 = Blob.from_string(fp.read()) # are the two blobs equivalent? # if their contents are the same they should be... # calls dulwich.objects.ShaFile.__eq__, which just compares SHAs return blob1 != blob2 def _file_in_tree(self, path, ref=None): """ Returns True if the file corresponds to a blob in the HEAD commit's tree, False otherwise. :param path: path to the file relative to the repository root. :param ref: optional ref to compare the WT with, default is HEAD. """ # handle no head scenario when this gets called before first commit try: self.head() except NoHeadSet: return False # get the tree tree = self.repo[self.head().tree] if self._obj_from_tree(tree, path) is not None: return True return False def _apply_to_tree(self, tree, f, path=None): """ Walk a tree recursively and apply function, f, to each entry :param tree: a dulwich.objects.Tree object :param f: function that will be called with each entry. :param path: if provided, the path relative to the repository will be included in the function call. """ if type(tree) is not Tree: raise NotTreeError for entry in tree.iteritems(): f(entry, path) if path else f(entry) obj = self.repo[entry.sha] if type(obj) is Tree: new_path = os.path.join(path, f) if path else None self._apply_to_tree(obj, f, new_path) def _obj_from_tree(self, tree, path): """ Walk a tree recursively to retrieve and return a blob or sub-tree from the given path, or return None if one does not exist. :param tree: a dulwich.objects.Tree object. :param path: path relative to the repository root. :return: Tree object, Blob object, or None if the path could not be found. For example, providing ``hopper/git.py`` would return the ``git.py`` blob within the ``hopper`` sub-tree. """ if type(tree) is not Tree: raise NotTreeError('Object is not a tree') # remove trailing slashes from path (so basename doesn't return '') if path[-1] == os.sep: path = path[:-1] # we need the head of the path, which is either the file itself or a # directory. head = path.split(os.sep)[0] if len(head) > 1: # clip head from path for recursion new_path = os.sep.join(path.split(os.sep)[1:]) for entry in tree.iteritems(): # these are dulwich.objects.TreeEntry objects if entry.path == head: # get the Tree or Blob. obj = self.repo[entry.sha] # return if we're at the right path if head == path: return obj # otherwise recurse if it's a Tree elif type(obj) is Tree: return self._obj_from_tree(obj, new_path) # if we get here the path wasn't there. return None def _write_tree_to_wt(self, tree, basepath): """ Walk a tree recursively and write each blob's data to the working tree. :param tree: a dulwich.objects.Tree object. :param basepath: blob data is written to: ``os.path.join(basepath, blob_path)``. Recursive calls will append the sub-tree name to the original call. """ if type(tree) is not Tree: raise NotTreeError('Object is not a tree') for entry in tree.iteritems(): obj = self.repo[entry.sha] if type(obj) is Blob: path = os.path.join(basepath, entry.path) with open(path, 'wb') as fp: fp.write(obj.data) elif type(obj) is Tree: new_basepath = os.path.join(basepath, entry.path) self._write_tree_to_wt(obj, new_basepath) def _resolve_ref(self, ref): """ Resolve a reference to a commit SHA. :param ref: branch, tag, commit reference. :return: a commit SHA. :raises KeyError: if ref doesn't point to a commit. :raises TypeError: if ref is not a string. """ # order: branch -> tag -> commit # (tag and branch can have same name, git assumes branch) if type(ref) is not str: raise TypeError('ref must be a string') # dulwich.Repo.refs keys the full name # (i.e. 'refs/heads/master') for branches and tags branch = _expand_branch_name(ref) tag = _expand_tag_name(ref) # branch? if branch in self.repo.refs: # get the commit SHA that the branch points to return self.repo[branch].id # tag? elif tag in self.repo.refs: return self.repo[tag].id # commit? else: obj = self.repo[ref] if type(obj) is Commit: return obj.id else: raise KeyError('Bad reference: %s' % ref) def _diff_file(self, path, a, b=None, html=False): """ Use difflib to compare a file between two commits, or a single commit and the working tree. :param a: ref to commit a. :param b: ref to commit b, defaults to the working tree. :param path: path to file, relative to repo root. :param html: format using difflib.HtmlDiff. :raise NotBlobError: if path wasn't present in both trees. """ # resolve commit a = self._resolve_ref(a) # get the trees tree1 = self.repo[self.repo[a].tree] # get the blob blob1 = self._obj_from_tree(tree1, path) # set data or empty string (meaning no blob at path) data1 = blob1.data if type(blob1) is Blob else '' if b is None: with open(os.path.join(self.root, path), 'r') as fp: data2 = fp.read() else: b = self._resolve_ref(b) tree2 = self.repo[self.repo[b].tree] blob2 = self._obj_from_tree(tree2, path) data2 = blob2.data if type(blob2) is Blob else '' # if both blobs were missing => bad path if type(blob1) is not Blob and type(blob2) is not Blob: raise NotBlobError( 'Path did not point to a blob in either tree') diff = list( difflib.context_diff(data1.splitlines(), data2.splitlines())) return '\n'.join(diff)
class GitRepo(object): def __init__(self, path): if os.path.exists(path): if not os.path.isdir(path): raise IOError('Git repository "%s" must be a directory.' % path) try: self.repo = Repo(path) except NotGitRepository: # repo does not exist self.repo = Repo.init(path, not os.path.exists(path)) self.temp_persist_files = [] def _get_commit(self, version="HEAD"): commit = self.repo[version] if not isinstance(commit, Commit): raise NotCommitError(commit) return commit def get_type(self, name, version="HEAD"): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) if tree[name][0] & stat.S_IFDIR: return "tree" else: return "blob" def get_path(self, name, version="HEAD", path_type=None, out_name=None, out_suffix=''): if path_type is None: path_type = self.get_type(name, version) if path_type == 'tree': return self.get_dir(name, version, out_name, out_suffix) elif path_type == 'blob': return self.get_file(name, version, out_name, out_suffix) raise TypeError("Unknown path type '%s'" % path_type) def _write_blob(self, blob_sha, out_fname=None, out_suffix=''): if out_fname is None: # create a temporary file (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix, prefix='vt_persist') os.close(fd) self.temp_persist_files.append(out_fname) else: out_dirname = os.path.dirname(out_fname) if out_dirname and not os.path.exists(out_dirname): os.makedirs(out_dirname) blob = self.repo.get_blob(blob_sha) with open(out_fname, "wb") as f: for b in blob.as_raw_chunks(): f.write(b) return out_fname def get_file(self, name, version="HEAD", out_fname=None, out_suffix=''): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find blob "%s"' % name) blob_sha = tree[name][1] out_fname = self._write_blob(blob_sha, out_fname, out_suffix) return out_fname def get_dir(self, name, version="HEAD", out_dirname=None, out_suffix=''): if out_dirname is None: # create a temporary directory out_dirname = tempfile.mkdtemp(suffix=out_suffix, prefix='vt_persist') self.temp_persist_files.append(out_dirname) elif not os.path.exists(out_dirname): os.makedirs(out_dirname) commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find tree "%s"' % name) subtree_id = tree[name][1] # subtree = self.repo.tree(subtree_id) for entry in self.repo.object_store.iter_tree_contents(subtree_id): out_fname = os.path.join(out_dirname, entry.path) self._write_blob(entry.sha, out_fname) return out_dirname def get_hash(self, name, version="HEAD", path_type=None): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) return tree[name][1] @staticmethod def compute_blob_hash(fname, chunk_size=1<<16): obj_len = os.path.getsize(fname) head = object_header(Blob.type_num, obj_len) with open(fname, "rb") as f: def read_chunk(): return f.read(chunk_size) my_iter = chain([head], iter(read_chunk,'')) return iter_sha1(my_iter) return None @staticmethod def compute_tree_hash(dirname): tree = Tree() for entry in sorted(os.listdir(dirname)): fname = os.path.join(dirname, entry) if os.path.isdir(fname): thash = GitRepo.compute_tree_hash(fname) mode = stat.S_IFDIR # os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, thash) elif os.path.isfile(fname): bhash = GitRepo.compute_blob_hash(fname) mode = os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, bhash) return tree.id @staticmethod def compute_hash(path): if os.path.isdir(path): return GitRepo.compute_tree_hash(path) elif os.path.isfile(path): return GitRepo.compute_blob_hash(path) raise TypeError("Do not support this type of path") def get_latest_version(self, path): head = self.repo.head() walker = Walker(self.repo.object_store, [head], max_entries=1, paths=[path]) return iter(walker).next().commit.id def _stage(self, filename): fullpath = os.path.join(self.repo.path, filename) if os.path.islink(fullpath): debug.warning("Warning: not staging symbolic link %s" % os.path.basename(filename)) elif os.path.isdir(fullpath): for f in os.listdir(fullpath): self._stage(os.path.join(filename, f)) else: if os.path.sep != '/': filename = filename.replace(os.path.sep, '/') self.repo.stage(filename) def add_commit(self, filename): self.setup_git() self._stage(filename) commit_id = self.repo.do_commit('Updated %s' % filename) return commit_id def setup_git(self): config_stack = self.repo.get_config_stack() try: config_stack.get(('user',), 'name') config_stack.get(('user',), 'email') except KeyError: from vistrails.core.system import current_user from dulwich.config import ConfigFile user = current_user() repo_conf = self.repo.get_config() repo_conf.set(('user',), 'name', user) repo_conf.set(('user',), 'email', '%s@localhost' % user) repo_conf.write_to_path()
class Gittle(object): """All paths used in Gittle external methods must be paths relative to the git repository """ DEFAULT_COMMIT = 'HEAD' DEFAULT_BRANCH = 'master' DEFAULT_REMOTE = 'origin' DEFAULT_MESSAGE = '**No Message**' DEFAULT_USER_INFO = { 'name': None, 'email': None, } DIFF_FUNCTIONS = { 'classic': utils.git.classic_tree_diff, 'dict': utils.git.dict_tree_diff, 'changes': utils.git.dict_tree_diff } DEFAULT_DIFF_TYPE = 'dict' HIDDEN_REGEXES = [ # Hide git directory r'.*\/\.git\/.*', ] # References REFS_BRANCHES = 'refs/heads/' REFS_REMOTES = 'refs/remotes/' REFS_TAGS = 'refs/tags/' # Name pattern truths # Used for detecting if files are : # - deleted # - added # - changed PATTERN_ADDED = (False, True) PATTERN_REMOVED = (True, False) PATTERN_MODIFIED = (True, True) # Permissions MODE_DIRECTORY = 0o40000 # Used to tell if a tree entry is a directory # Tree depth MAX_TREE_DEPTH = 1000 # Acceptable Root paths ROOT_PATHS = (os.path.curdir, os.path.sep) def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs): if isinstance(repo_or_path, DulwichRepo): self.repo = repo_or_path elif isinstance(repo_or_path, Gittle): self.repo = DulwichRepo(repo_or_path.path) elif isinstance(repo_or_path, basestring): path = os.path.abspath(repo_or_path) self.repo = DulwichRepo(path) else: logging.warning('Repo is of type %s' % type(repo_or_path)) raise Exception('Gittle must be initialized with either a dulwich repository or a string to the path') # Set path self.path = self.repo.path # The remote url self.origin_uri = origin_uri # Report client activty self._report_activity = report_activity # Build ignore filter self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES) self.hidden_regexes.extend(self._get_ignore_regexes()) self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes) self.filters = [ self.ignore_filter, ] # Get authenticator if auth: self.authenticator = auth else: self.auth(*args, **kwargs) def report_activity(self, *args, **kwargs): if not self._report_activity: return return self._report_activity(*args, **kwargs) def _format_author(self, name, email): return "%s <%s>" % (name, email) def _format_userinfo(self, userinfo): name = userinfo.get('name') email = userinfo.get('email') if name and email: return self._format_author(name, email) return None def _format_ref(self, base, extra): return ''.join([base, extra]) def _format_ref_branch(self, branch_name): return self._format_ref(self.REFS_BRANCHES, branch_name) def _format_ref_remote(self, remote_name): return self._format_ref(self.REFS_REMOTES, remote_name) def _format_ref_tag(self, tag_name): return self._format_ref(self.REFS_TAGS, tag_name) @property def head(self): """Return SHA of the current HEAD """ return self.repo.head() @property def is_bare(self): """Bare repositories have no working directories or indexes """ return self.repo.bare @property def is_working(self): return not(self.is_bare) def has_index(self): """Opposite of is_bare """ return self.repo.has_index() @property def has_commits(self): """ If the repository has no HEAD we consider that is has no commits """ try: self.repo.head() except KeyError: return False return True def ref_walker(self, ref=None): """ Very simple, basic walker """ ref = ref or 'HEAD' sha = self._commit_sha(ref) for entry in self.repo.get_walker(sha): yield entry.commit def branch_walker(self, branch): branch = branch or self.active_branch ref = self._format_ref_branch(branch) return self.ref_walker(ref) def commit_info(self, start=0, end=None, branch=None): """Return a generator of commits with all their attached information """ if not self.has_commits: return [] commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)] if not end: return commits return commits[start:end] @funky.uniquify def recent_contributors(self, n=None, branch=None): n = n or 10 return funky.pluck(self.commit_info(end=n, branch=branch), 'author') @property def commit_count(self): try: return len(self.ref_walker()) except KeyError: return 0 def commits(self): """Return a list of SHAs for all the concerned commits """ return [commit['sha'] for commit in self.commit_info()] @property def git_dir(self): return self.repo.controldir() def auth(self, *args, **kwargs): self.authenticator = GittleAuth(*args, **kwargs) return self.authenticator # Generate a branch selector (used for pushing) def _wants_branch(self, branch_name=None): branch_name = branch_name or self.active_branch refs_key = self._format_ref_branch(branch_name) sha = self.branches[branch_name] def wants_func(old): refs_key = self._format_ref_branch(branch_name) return { refs_key: sha } return wants_func def _get_ignore_regexes(self): gitignore_filename = os.path.join(self.path, '.gitignore') if not os.path.exists(gitignore_filename): return [] lines = open(gitignore_filename).readlines() globers = map(lambda line: line.rstrip(), lines) return utils.paths.globers_to_regex(globers) # Get the absolute path for a file in the git repo def abspath(self, repo_file): return os.path.abspath( os.path.join(self.path, repo_file) ) # Get the relative path from the absolute path def relpath(self, abspath): return os.path.relpath(abspath, self.path) @property def last_commit(self): return self[self.repo.head()] @property def index(self): return self.repo.open_index() @classmethod def init(cls, path, bare=None, *args, **kwargs): """Initialize a repository""" mkdir_safe(path) # Constructor to use if bare: constructor = DulwichRepo.init_bare else: constructor = DulwichRepo.init # Create dulwich repo repo = constructor(path) # Create Gittle repo return cls(repo, *args, **kwargs) @classmethod def init_bare(cls, *args, **kwargs): kwargs.setdefault('bare', True) return cls.init(*args, **kwargs) @classmethod def is_repo(cls, path): """Returns True if path is a git repository, False if it is not""" try: repo = Gittle(path) except NotGitRepository: return False else: return True def get_client(self, origin_uri=None, **kwargs): # Get the remote URL origin_uri = origin_uri or self.origin_uri # Fail if inexistant if not origin_uri: raise InvalidRemoteUrl() client_kwargs = {} auth_kwargs = self.authenticator.kwargs() client_kwargs.update(auth_kwargs) client_kwargs.update(kwargs) client_kwargs.update({ 'report_activity': self.report_activity }) client, remote_path = get_transport_and_path(origin_uri, **client_kwargs) return client, remote_path def push_to(self, origin_uri, branch_name=None, progress=None): selector = self._wants_branch(branch_name=branch_name) client, remote_path = self.get_client(origin_uri) return client.send_pack( remote_path, selector, self.repo.object_store.generate_pack_contents, progress=progress ) # Like: git push def push(self, origin_uri=None, branch_name=None, progress=None): return self.push_to(origin_uri, branch_name, progress) # Not recommended at ALL ... !!! def dirty_pull_from(self, origin_uri, branch_name=None): # Remove all previously existing data rmtree(self.path) mkdir_safe(self.path) self.repo = DulwichRepo.init(self.path) # Fetch brand new copy from remote return self.pull_from(origin_uri, branch_name) def pull_from(self, origin_uri, branch_name=None): return self.fetch(origin_uri) # Like: git pull def pull(self, origin_uri=None, branch_name=None): return self.pull_from(origin_uri, branch_name) def fetch_remote(self, origin_uri=None): # Get client client, remote_path = self.get_client(origin_uri=origin_uri) # Fetch data from remote repository remote_refs = client.fetch(remote_path, self.repo) return remote_refs def _setup_fetched_refs(self, refs, origin, bare): remote_tags = utils.git.subrefs(refs, 'refs/tags') remote_heads = utils.git.subrefs(refs, 'refs/heads') # Filter refs clean_remote_tags = utils.git.clean_refs(remote_tags) clean_remote_heads = utils.git.clean_refs(remote_heads) # Base of new refs heads_base = 'refs/remotes/' + origin if bare: heads_base = 'refs/heads' # Import branches self.import_refs( heads_base, clean_remote_heads ) # Import tags self.import_refs( 'refs/tags', clean_remote_tags ) # Update HEAD for k, v in utils.git.clean_refs(refs).items(): self[k] = v def fetch(self, origin_uri=None, bare=None, origin=None): bare = bare or False origin = origin or self.DEFAULT_REMOTE # Remote refs remote_refs = self.fetch_remote(origin_uri) # Update head # Hit repo because head doesn't yet exist so # print("REFS = %s" % remote_refs) # If no refs (empty repository() if not remote_refs: return # Update refs (branches, tags, HEAD) self._setup_fetched_refs(remote_refs, origin, bare) # Checkout working directories if not bare and self.has_commits: self.checkout_all() else: self.update_server_info() @classmethod def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs): """Clone a remote repository""" mkdir_safe(local_path) # Initialize the local repository if bare: local_repo = cls.init_bare(local_path) else: local_repo = cls.init(local_path) repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs) repo.fetch(bare=bare) # Add origin repo.add_remote('origin', origin_uri) return repo @classmethod def clone_bare(cls, *args, **kwargs): """Same as .clone except clones to a bare repository by default """ kwargs.setdefault('bare', True) return cls.clone(*args, **kwargs) def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs): if not tree: # If no tree then stage files modified_files = files or self.modified_files logging.info("STAGING : %s" % modified_files) self.repo.stage(modified_files) # Messages message = message or self.DEFAULT_MESSAGE author_msg = self._format_userinfo(author) committer_msg = self._format_userinfo(committer) return self.repo.do_commit( message=message, author=author_msg, committer=committer_msg, encoding='UTF-8', tree=tree, *args, **kwargs ) def _tree_from_structure(self, structure): # TODO : Support directories tree = Tree() for file_info in structure: # str only try: data = file_info['data'].encode('ascii') name = file_info['name'].encode('ascii') mode = file_info['mode'] except: # Skip file on encoding errors continue blob = Blob() blob.data = data # Store file's contents self.repo.object_store.add_object(blob) # Add blob entry tree.add( name, mode, blob.id ) # Store tree self.repo.object_store.add_object(tree) return tree.id # Like: git commmit -a def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs): user_info = { 'name': name, 'email': email, } return self._commit( committer=user_info, author=user_info, message=message, files=files, *args, **kwargs ) def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs): """Main use is to do commits directly to bare repositories For example doing a first Initial Commit so the repo can be cloned and worked on right away """ if not structure: return tree = self._tree_from_structure(structure) user_info = { 'name': name, 'email': email, } return self._commit( committer=user_info, author=user_info, message=message, tree=tree, *args, **kwargs ) # Push all local commits # and pull all remote commits def sync(self, origin_uri=None): self.push(origin_uri) return self.pull(origin_uri) def lookup_entry(self, relpath, trackable_files=set()): if not relpath in trackable_files: raise KeyError abspath = self.abspath(relpath) with open(abspath, 'rb') as git_file: data = git_file.read() s = sha1() s.update("blob %u\0" % len(data)) s.update(data) return (s.hexdigest(), os.stat(abspath).st_mode) @property @funky.transform(set) def tracked_files(self): return list(self.index) @property @funky.transform(set) def raw_files(self): return utils.paths.subpaths(self.path) @property @funky.transform(set) def ignored_files(self): return utils.paths.subpaths(self.path, filters=self.filters) @property @funky.transform(set) def trackable_files(self): return self.raw_files - self.ignored_files @property @funky.transform(set) def untracked_files(self): return self.trackable_files - self.tracked_files """ @property @funky.transform(set) def modified_staged_files(self): "Checks if the file has changed since last commit" timestamp = self.last_commit.commit_time index = self.index return [ f for f in self.tracked_files if index[f][1][0] > timestamp ] """ # Return a list of tuples # representing the changed elements in the git tree def _changed_entries(self, ref=None): ref = ref or self.DEFAULT_COMMIT if not self.has_commits: return [] obj_sto = self.repo.object_store tree_id = self[ref].tree names = self.trackable_files lookup_func = partial(self.lookup_entry, trackable_files=names) # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...] tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False) return list(tree_diff) @funky.transform(set) def _changed_entries_by_pattern(self, pattern): changed_entries = self._changed_entries() filtered_paths = None #if the pattern is PATTERN_MODIFIED, should check the sha if self.PATTERN_MODIFIED == pattern: filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) and sha[0] == sha[1] ] else : filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) ] return filtered_paths @property @funky.transform(set) def removed_files(self): return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files @property @funky.transform(set) def added_files(self): return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files @property @funky.transform(set) def modified_files(self): modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files return modified_files @property @funky.transform(set) def modified_unstaged_files(self): timestamp = self.last_commit.commit_time return [ f for f in self.tracked_files if os.stat(self.abspath(f)).st_mtime > timestamp ] @property def pending_files(self): """ Returns a list of all files that could be possibly staged """ # Union of both return self.modified_files | self.added_files | self.removed_files @property def pending_files_by_state(self): files = { 'modified': self.modified_files, 'added': self.added_files, 'removed': self.removed_files } # "Flip" the dictionary return { path: state for state, paths in files.items() for path in paths } """ @property @funky.transform(set) def modified_files(self): return self.modified_staged_files | self.modified_unstaged_files """ # Like: git add @funky.arglist_method def stage(self, files): return self.repo.stage(files) def add(self, *args, **kwargs): return self.stage(*args, **kwargs) # Like: git rm @funky.arglist_method def rm(self, files, force=False): index = self.index index_files = filter(lambda f: f in index, files) for f in index_files: del self.index[f] return index.write() def mv_fs(self, file_pair): old_name, new_name = file_pair os.rename(old_name, new_name) # Like: git mv @funky.arglist_method def mv(self, files_pair): index = self.index files_in_index = filter(lambda f: f[0] in index, files_pair) map(self.mv_fs, files_in_index) old_files = map(funky.first, files_in_index) new_files = map(funky.last, files_in_index) self.add(new_files) self.rm(old_files) self.add(old_files) return @working_only def _checkout_tree(self, tree): return build_index_from_tree( self.repo.path, self.repo.index_path(), self.repo.object_store, tree ) def checkout_all(self, commit_sha=None): commit_sha = commit_sha or self.head commit_tree = self._commit_tree(commit_sha) # Rebuild index from the current tree return self._checkout_tree(commit_tree) def checkout(self, ref): """Checkout a given ref or SHA """ self.repo.refs.set_symbolic_ref('HEAD', ref) commit_tree = self._commit_tree(ref) # Rebuild index from the current tree return self._checkout_tree(commit_tree) @funky.arglist_method def reset(self, files, commit='HEAD'): pass def rm_all(self): # if we go at the index via the property, it is reconstructed # each time and therefore clear() doesn't have the desired effect, # therefore, we cache it in a variable and use that. i = self.index i.clear() return i.write() def _to_commit(self, commit_obj): """Allows methods to accept both SHA's or dulwich Commit objects as arguments """ if isinstance(commit_obj, basestring): return self.repo[commit_obj] return commit_obj def _commit_sha(self, commit_obj): """Extracts a Dulwich commits SHA """ if utils.git.is_sha(commit_obj): return commit_obj elif isinstance(commit_obj, basestring): # Can't use self[commit_obj] to avoid infinite recursion commit_obj = self.repo[self.dwim_reference(commit_obj)] return commit_obj.id def dwim_reference(self, ref): """Dwim resolves a short reference to a full reference """ # Formats of refs we want to try in order formats = [ "%s", "refs/%s", "refs/tags/%s", "refs/heads/%s", "refs/remotes/%s", "refs/remotes/%s/HEAD", ] for f in formats: try: fullref = f % ref if not fullref in self.repo: continue return fullref except: continue raise Exception("Could not resolve ref") def blob_data(self, sha): """Return a blobs content for a given SHA """ return self[sha].data # Get the nth parent back for a given commit def get_parent_commit(self, commit, n=None): """ Recursively gets the nth parent for a given commit Warning: Remember that parents aren't the previous commits """ if n is None: n = 1 commit = self._to_commit(commit) parents = commit.parents if n <= 0 or not parents: # Return a SHA return self._commit_sha(commit) parent_sha = parents[0] parent = self[parent_sha] # Recur return self.get_parent_commit(parent, n - 1) def get_previous_commit(self, commit_ref, n=None): commit_sha = self._parse_reference(commit_ref) n = n or 1 commits = self.commits() return funky.next(commits, commit_sha, n=n, default=commit_sha) def _parse_reference(self, ref_string): # COMMIT_REF~x if '~' in ref_string: ref, count = ref_string.split('~') count = int(count) commit_sha = self._commit_sha(ref) return self.get_previous_commit(commit_sha, count) return self._commit_sha(ref_string) def _commit_tree(self, commit_sha): """Return the tree object for a given commit """ return self[commit_sha].tree def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True): diff_type = diff_type or self.DEFAULT_DIFF_TYPE diff_func = self.DIFF_FUNCTIONS[diff_type] if not compare_to: compare_to = self.get_previous_commit(commit_sha) return self._diff_between(compare_to, commit_sha, diff_function=diff_func) def diff_working(self, ref=None, filter_binary=True): """Diff between the current working directory and the HEAD """ return utils.git.diff_changes_paths( self.repo.object_store, self.path, self._changed_entries(ref=ref), filter_binary=filter_binary ) def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None): """Returns a dict of the following Format : { "directory/filename.txt": { 'name': 'filename.txt', 'path': "directory/filename.txt", "sha": "xxxxxxxxxxxxxxxxxxxx", "data": "blablabla", "mode": 0xxxxx", }, ... } """ # Default values context = {} is_tree = is_tree or False parent_path = parent_path or '' if is_tree: tree = self[commit_sha] else: tree = self[self._commit_tree(commit_sha)] for entry in tree.items(): # Check if entry is a directory if entry.mode == self.MODE_DIRECTORY: context.update( self.get_commit_files(entry.sha, parent_path=os.path.join(parent_path, entry.path), is_tree=True, paths=paths) ) continue subpath = os.path.join(parent_path, entry.path) # Only add the files we want if not(paths is None or subpath in paths): continue # Add file entry context[subpath] = { 'name': entry.path, 'path': subpath, 'mode': entry.mode, 'sha': entry.sha, 'data': self.blob_data(entry.sha), } return context def file_versions(self, path): """Returns all commits where given file was modified """ versions = [] commits_info = self.commit_info() seen_shas = set() for commit in commits_info: try: files = self.get_commit_files(commit['sha'], paths=[path]) file_path, file_data = files.items()[0] except IndexError: continue file_sha = file_data['sha'] if file_sha in seen_shas: continue else: seen_shas.add(file_sha) # Add file info commit['file'] = file_data versions.append(file_data) return versions def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True): """Internal method for getting a diff between two commits Please use .diff method unless you have very specific needs """ # If commit is first commit (new_commit_sha == old_commit_sha) # then compare to an empty tree if new_commit_sha == old_commit_sha: old_tree = Tree() else: old_tree = self._commit_tree(old_commit_sha) new_tree = self._commit_tree(new_commit_sha) return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary) def changes(self, *args, **kwargs): """ List of changes between two SHAs Returns a list of lists of tuples : [ [ (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) ], ... ] """ kwargs['diff_type'] = 'changes' return self.diff(*args, **kwargs) def changes_count(self, *args, **kwargs): return len(self.changes(*args, **kwargs)) def _refs_by_pattern(self, pattern): refs = self.refs def item_filter(key_value): """Filter only concered refs""" key, value = key_value return key.startswith(pattern) def item_map(key_value): """Rewrite keys""" key, value = key_value new_key = key[len(pattern):] return (new_key, value) return dict( map(item_map, filter( item_filter, refs.items() ) ) ) @property def refs(self): return self.repo.get_refs() def set_refs(refs_dict): for k, v in refs_dict.items(): self.repo[k] = v def import_refs(self, base, other): return self.repo.refs.import_refs(base, other) @property def branches(self): return self._refs_by_pattern(self.REFS_BRANCHES) @property def active_branch(self): """Returns the name of the active branch, or None, if HEAD is detached """ x = self.repo.refs.read_ref('HEAD') if not x.startswith(SYMREF): return None else: symref = x[len(SYMREF):] if not symref.startswith(self.REFS_BRANCHES): return None else: return symref[len(self.REFS_BRANCHES):] @property def active_sha(self): """Deprecated equivalent to head property """ return self.head @property def remote_branches(self): return self._refs_by_pattern(self.REFS_REMOTES) @property def tags(self): return self._refs_by_pattern(self.REFS_TAGS) @property def remotes(self): """ Dict of remotes { 'origin': 'http://friendco.de/some_user/repo.git', ... } """ config = self.repo.get_config() return { keys[1]: values['url'] for keys, values in config.items() if keys[0] == 'remote' } def add_remote(self, remote_name, remote_url): # Get repo's config config = self.repo.get_config() # Add new entries for remote config.set(('remote', remote_name), 'url', remote_url) config.set(('remote', remote_name), 'fetch', "+refs/heads/*:refs/remotes/%s/*" % remote_name) # Write to disk config.write_to_path() return remote_name def add_ref(self, new_ref, old_ref): self.repo.refs[new_ref] = old_ref self.update_server_info() def remove_ref(self, ref_name): # Returns False if ref doesn't exist if not ref_name in self.repo.refs: return False del self.repo.refs[ref_name] self.update_server_info() return True def create_branch(self, base_branch, new_branch, tracking=None): """Try creating a new branch which tracks the given remote if such a branch does not exist then branch off a local branch """ # The remote to track tracking = self.DEFAULT_REMOTE # Already exists if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) # Get information about remote_branch remote_branch = os.path.sep.join([tracking, base_branch]) # Fork Local if base_branch in self.branches: base_ref = self._format_ref_branch(base_branch) # Fork remote elif remote_branch in self.remote_branches: base_ref = self._format_ref_remote(remote_branch) # TODO : track else: raise Exception("Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking)) # Reference of new branch new_ref = self._format_ref_branch(new_branch) # Copy reference to create branch self.add_ref(new_ref, base_ref) return new_ref def create_orphan_branch(self, new_branch, empty_index=None): """ Create a new branch with no commits in it. Technically, just points HEAD to a non-existent branch. The actual branch will only be created if something is committed. This is equivalent to: git checkout --orphan <new_branch>, Unless empty_index is set to True, in which case the index will be emptied along with the file-tree (which is always emptied). Against a clean working tree, this is equivalent to: git checkout --orphan <new_branch> git reset --merge """ if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) new_ref = self._format_ref_branch(new_branch) self.repo.refs.set_symbolic_ref('HEAD', new_ref) if self.is_working: if empty_index: self.rm_all() self.clean_working() return new_ref def remove_branch(self, branch_name): ref = self._format_ref_branch(branch_name) return self.remove_ref(ref) def switch_branch(self, branch_name, tracking=None, create=None): """Changes the current branch """ if create is None: create = True # Check if branch exists if not branch_name in self.branches: self.create_branch(branch_name, branch_name, tracking=tracking) # Get branch reference branch_ref = self._format_ref_branch(branch_name) # Change main branch self.repo.refs.set_symbolic_ref('HEAD', branch_ref) if self.is_working: # Remove all files self.clean_working() # Add files for the current branch self.checkout_all() def create_tag(self, tag_name, target): ref = self._format_ref_tag(tag_name) return self.add_ref(ref, self._parse_reference(target)) def remove_tag(self, tag_name): ref = self._format_ref_tag(tag_name) return self.remove_ref(ref) def clean(self, force=None, directories=None): untracked_files = self.untracked_files map(os.remove, untracked_files) return untracked_files def clean_working(self): """Purges all the working (removes everything except .git) used by checkout_all to get clean branch switching """ return self.clean() def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None): tree = self[tree_sha] structure = {} if depth is None: depth = self.MAX_TREE_DEPTH elif depth == 0: return structure for entry in tree.items(): # tree if entry.mode == self.MODE_DIRECTORY: # Recur structure[entry.path] = self._get_fs_structure(entry.sha, depth=depth - 1, parent_sha=tree_sha) # commit else: structure[entry.path] = entry.sha structure['.'] = tree_sha structure['..'] = parent_sha or tree_sha return structure def _get_fs_structure_by_path(self, tree_sha, path): parts = path.split(os.path.sep) depth = len(parts) + 1 structure = self._get_fs_structure(tree_sha, depth=depth) return funky.subkey(structure, parts) def commit_ls(self, ref, subpath=None): """List a "directory" for a given commit using the tree of that commit """ tree_sha = self._commit_tree(ref) # Root path if subpath in self.ROOT_PATHS or not subpath: return self._get_fs_structure(tree_sha, depth=1) # Any other path return self._get_fs_structure_by_path(tree_sha, subpath) def commit_file(self, ref, path): """Return info on a given file for a given commit """ name, info = self.get_commit_files(ref, paths=[path]).items()[0] return info def commit_tree(self, ref, *args, **kwargs): tree_sha = self._commit_tree(ref) return self._get_fs_structure(tree_sha, *args, **kwargs) def update_server_info(self): if not self.is_bare: return update_server_info(self.repo) def _is_fast_forward(self): pass def _merge_fast_forward(self): pass def __hash__(self): """This is required otherwise the memoize function will just mess it up """ return hash(self.path) def __getitem__(self, key): try: sha = self._parse_reference(key) except: raise KeyError(key) return self.repo[sha] def __setitem__(self, key, value): try: key = self.dwim_reference(key) except: pass self.repo[key] = value def __contains__(self, key): try: key = self.dwim_reference(key) except: pass return key in self.repo def __delitem__(self, key): try: key = self.dwim_reference(key) except: raise KeyError(key) self.remove_ref(key) # Alias to clone_bare fork = clone_bare log = commit_info diff_count = changes_count contributors = recent_contributors
class Game(object): "A versioned game" def __init__(self, name=None, **options): self.name = name or uuid.uuid4().hex self.options = dict(DEFAULTS, **options) self.data = self.options.pop('data').format(name=self.name) new = False self.repo = None if not os.path.exists(self.data): if not self.options['create']: raise GameError("Game does not exist") os.makedirs(self.data) try: self.repo = Repo(self.data) except dulwich.errors.NotGitRepository: if not self.options['create']: raise GameError("Game does not exist") self.repo = Repo.init_bare(self.data) new = True self.board = (new and BoardState()) or self.get_board() if new: self.save("New blank board for game: %s" % self.name) @property def branch(self): head = self.repo.refs.read_ref('HEAD') if head and head.startswith('ref: '): head = head.split(': ')[-1] head = head.replace('refs/heads/', '') return head return 'master' def _tree(self, branch=None): branch = branch or self.branch try: return self.repo[ self.repo['refs/heads/%s' % branch].tree ] except KeyError: return Tree() def signature(self, of=None): of = (of and "refs/heads/%s" % of) or "HEAD" try: return self.repo.refs[of] except KeyError: return None def get_board(self, branch=None): branch = branch or self.branch if branch not in self.branches(): raise GameError("Unknown branch") return BoardState.from_json( self.repo[ [t[2] for t in self._tree(branch).entries() # [(mode, name, sha)...] if t[1] == 'board.json'].pop() ].data) def set_branch(self, new): if 'refs/heads/%s' % new in self.repo.get_refs().keys(): self.repo.refs.set_symbolic_ref('HEAD', 'refs/heads/%s' % new) return self.branch return False def branches(self): return sorted([name.replace('refs/heads/', '') for (name, sig) in self.repo.get_refs().items() if name != "HEAD"]) def make_branch(self, name, back=0): if ('refs/heads/%s' % name) in self.repo.get_refs().keys(): raise GameError("I already have this branch") try: head = self.repo.head() history = self.repo.revision_history(head) self.repo.refs['refs/heads/%s' % name] = history[back].id except IndexError: raise GameError("Trying to go {back} which is further than history".format(back=back)) return True def save(self, message="Forced commit"): blob = Blob.from_string(self.board.as_json()) tree = self._tree() tree.add(0100644, 'board.json', blob.id) [self.repo.object_store.add_object(it) for it in (blob, tree)] self.repo.do_commit(message, committer="Game %s" % self.name, tree=tree.id) def move(self, x, y): player = self.board.player_turn() if not self.board.game_over and self.board.move(x, y): self.save("{player} moved to ({x}, {y})".format(player=player, x=x, y=y)) return player return None def skip(self): player = self.board.player_turn() if not self.board.game_over: self.board.move(None) is_or_isnt = (self.board.game_over and "is") or "is NOT" self.save("{player} skipped, game {maybe} over".format(player=player, maybe=is_or_isnt)) return self.board.game_over or self.board.player_turn() def who(self): return self.board.player_turn() def scores(self): return self.board.scores() def winner(self): return self.board.winner def __unicode__(self): return "Game: {name} {black} vs {white} on {x}x{y} from {data} :: {board}".format( name=self.name, board=self.board, data=self.data, **self.options ) __str__=__unicode__ def __repr__(self): return "<%s>" % self
class GitPapersApp(object): def __init__(self, path='.'): try: self.repo = Repo(path) except NotGitRepository: raise RepoNotInitialised() self._db = None self._root = None def commit(self, paths, commit_message): """Stage the given paths and commit.""" # Must be relative to the repo def clean_path(path): rpath = self.repo.path if path.startswith(rpath) and path[len(rpath)]=='/': return path[len(rpath)+1:] return path paths = map(clean_path, paths) self.repo.stage(paths) self.repo.do_commit( message=str(commit_message), committer=COMMIT_AUTHOR, ) def history(self): """Geneerate individual papers, starting from most recent.""" # Generate from index not commit history for key in self.keymap: yield Paper.load(self.repo.path, self.keymap[key]) # # Key related functions/properties # @property def keymap(self): # Load keymap lazily if not hasattr(self, '_keymap'): MetaData = namedtuple('MetaData', 'timestamp, path, ext, reftype') kmap = {} with open(osp.join(self.repo.path, '.index')) as f: for line in f: if not line: continue key, ts, path, ext, reftype = self._parse_idx(line) if key in kmap: raise InvalidIndex('Duplicate key: %s' %key) kmap[key] = MetaData(ts, path, ext if ext is not 'nofile' else None, reftype) self._keymap = OrderedDict(sorted(kmap.items(), key=lambda x:-float(x[1].timestamp))) return self._keymap def _parse_idx(self, item): """Create tuple from single line in the index.""" if item.endswith('\n'): item = item[:-1] try: key, timestamp, path, ext, reftype =item.split(':') except ValueError: raise InvalidIndex('Malformed line: %s' %item) else: return (key, timestamp, path, ext, reftype) def __contains__(self, key): """Check whether a particular key exists in the database.""" return key in self.keymap def __len__(self): """The number of papers in the database.""" return len(self.keymap) def __getitem__(self, key): """Retrieve the paper with the given key.""" key = self._matchkey(key) metadata = self.keymap[key] return Paper.load(self.repo.path, metadata) def __iter__(self): for key in self.keymap: yield self[key] def _matchkey(self, key): """Matches the key against an entry in the index. Partial keys and directory names are permitted.""" if key.startswith('.'): pass # TODO: Could be directory #raise IllegalKey(key) if not key in self: # Try splitting the key to just base if '/' in key: if key.endswith('/'): key = key[:-1] key, _ = osp.splitext(osp.basename(key)) # Try matching partial keys if not key in self: matches = [k for k in self.keymap if k.startswith(key)] if len(matches)>1: raise MultipleKeyMatches(key, matches) if not len(matches): raise NonexistentKey(key) key = matches[0] return key # # Commands # @classmethod def init(cls, path='.'): """Initialise a new git-papers repo at the given path and commit the basic directory structure.""" # TODO: do we need seperate .db directory? try: Repo(path) except NotGitRepository: pass else: raise RepoInitialised() Repo.init(path) app = cls(path) emptyfiles = ['.index', '.tags', '.toread'] for path in emptyfiles: try: with open(path, 'w'): pass except (IOError, OSError) as e: from shutil import rmtree # TODO: remove created emptyfiles rmtree(osp.join(path, '.git')) raise FileCreationFailed(e.message) app.commit(emptyfiles, INIT) return app def _add_to_index(self, paper): """Add the paper to the end of the index.""" with open(osp.join(self.repo.path, '.index'), 'a') as f: # Save path relative to the repo path = paper.path if self.repo.path is not '.' and path.startswith(self.repo.path): path = path[len(self.repo.path)+1:] f.write('{key}:{ts}:{path}:{ext}:{reftype}\n'.format( key=paper.ref.key, ts=paper.timestamp, path=path, ext=paper.ext if paper.ext else 'nofile', reftype=type(paper.ref).__name__, )) def add(self, paperfile, reffunc, reftype=BibRef): """Add a new paper to the database, fetching the reference with the given reffunc.""" ref = BibRef.deserialize(reffunc()) if ref.key in self: raise DuplicateKey(ref.key) path = osp.join(self.repo.path, ref.type, ref.key) paper = Paper.new(path, ref, paperfile) self._add_to_index(paper) idx_path = osp.join(self.repo.path, '.index') if not paperfile: self.commit( [idx_path, paper.ref_path], commit_message(ADD, paper.key) ) else: self.commit( [idx_path, paper.filepath, paper.ref_path], commit_message(ADD, paper.key) ) return paper def rm(self, key): """Remove the paper from the repo.""" pass # TODO: stub def tag(self, key, suggest=True): paper = self[key] with open(osp.join(self.repo.path, '.tags'), 'r') as f: all_tags = TagSet(f.read()) current_tags = paper.tags
repo index = repo.open_index() print(index.path.decode(sys.getfilesystemencoding())) list(index) f = open('unscraper/thisIsATest.md','wb') _ = f.write(b"monty") f.close() repo.stage([b"thisIsATest"]) print(",".join([f.decode(sys.getfilesystemencoding()) for f in repo.open_index()])) commit_id = repo.do_commit( b"testing dulwich", committer=b"Aly <*****@*****.**>") repo.head() == commit_id print ('**************************************************') print ('******************batch complete******************') print ('**************************************************') time.sleep(86400) #24hr time delay #end of program
def move(path): debug("MOVE with path '%s'" % (path)) try: filename = secure_filename(path) except Exception as e: debug("secure_filename failed: %s:%s" % (path, str(e))) debug("ok: secure_filename succeed %s" % filename) # Prevent uploading file with more than 1 dot. dotCount = filename.count('.') if dotCount != 2: error("file do not contains 2 dot.") end(403, "file not contains 2 dot!") debug("ok: file contains just 2 dot.") root, ext = os.path.splitext(filename) first_ext = os.path.splitext(root)[1].lower() extension = first_ext + ext extension = extension.lower() if extension not in allowed_extention: error("file extension NOT allowed '%s'." % extension) debug("error: allowed %s." % (pp.pformat(allowed_extention))) end(403, "file extension not allowed!") debug("ok: file extension allowed.") basepath = os.path.join(config['MediaRoot']) filepath = os.path.join(basepath, filename) if not os.path.isdir(basepath): debug("error: Folder do not exist %s" % str(basepath)) end(403, "oups, Folder do not exist '%s'." % (str(basepath))) if not os.path.isfile(filepath): debug("error: Folder do not exist %s" % str(filepath)) end(403, "oups, Folder do not exist '%s'." % (str(filepath))) dest = request.headers.get('Destination') dest = re.sub(r'^https?://'+realhost+'/'+root_url, '', dest) dest = urllib.parse.unquote(dest, encoding='utf-8') try: destfilename = secure_filename(dest) except Exception as e: debug("secure_filename failed: %s:%s" % (dest, str(e))) debug("ok: secure_filename succeed %s" % destfilename) # Prevent uploading file with more than 1 dot. dotCount = destfilename.count('.') if dotCount != 1: error("destfile do contains 1 dot.") end(403, "destfile contains 1 dot!") debug("ok: destfile contains just 1 dot.") extension = os.path.splitext(destfilename)[1].lower() if extension not in allowed_extention: error("desfile extension NOT allowed '%s'." % extension) debug("error: allowed %s." % (pp.pformat(allowed_extention))) end(403, "destfile extension not allowed!") destfilepath = os.path.join(basepath, destfilename) if os.path.isfile(destfilepath): debug("File '%s' exist on system." % (destfilepath)) end(404, "File exist") try: os.rename(filepath, destfilepath) except FileExistsError: debug("Could not move file from '%s' to '%s' on system." % (filepath, destfilepath)) end(403, "Could not move file.") except Exception as e: debug("error: %s" % str(e)) debug("Could not move file from '%s' to '%s' on system." % (filepath, destfilepath)) end(403, "Could not move file.") # The interresting stuff now, we take a Git image. gitbasepath = os.path.join(basepath, '.git') if not os.path.isdir(gitbasepath): debug("error: Folder do not exist %s" % str(gitbasepath)) try: repo = Repo.init(basepath, mkdir=False) except Exception as e: debug("Git repo creation failed:%s:%s" % (basepath, str(e))) GIT_REPOSITORY = Repo(basepath) try: if 'Authorization' in request.headers: # "Authorization: Basic BASE64" real_committer = request.headers.get('Authorization') real_committer = base64.b64decode(real_committer.split(' ')[1]).decode('utf-8').split(':', 1)[0] real_committer = urllib.parse.unquote(real_committer, encoding='utf-8') real_firstname = real_committer.split('@', 1)[0] real_name = real_committer.split('@', 1)[1].split('.', 1)[0] real_committer = real_firstname.title() + ' ' + real_name.title() + ' <' + real_committer + '>' debug("ok: real_committer:%s" % (real_committer)) except Exception as e: debug("Git commiter fetch: failed:%s" % (str(e))) if not real_committer: real_committer = config("Committer") debug("ok: fake_committer:%s" % (real_committer)) try: GIT_REPOSITORY.stage([destfilename]) GIT_REPOSITORY.do_commit(basepath, committer=real_committer) except Exception as e: debug("Git repo commit failed:%s" % (str(e))) return '', 204
class Store(object): """ A simple key/value store using git as the backing store. """ def __init__(self, repo_path, serializer=None): if os.path.exists(repo_path): self.repo = Repo(repo_path) else: raise ValueError("Store repo path does not exist: %s" % repo_path) if not serializer: self.serializer = json else: self.serializer = serializer def create_branch(self, branch, parent=None): if not parent: parent = self.branch_head('master') branch_ref = self._branch_ref_name(branch) self.repo.refs.add_if_new(branch_ref, parent) return {'sha': self.branch_head(branch)} def merge(self, source_branch, target_branch='master', author=None, committer=None): if source_branch == target_branch: raise ValueError("Cannot merge branch with itself %s" % source_branch) target_tree = self._get_object(ROOT_PATH, target_branch) branch_tree = self._get_object(ROOT_PATH, source_branch) for tc in diff_tree.tree_changes(self.repo.object_store, target_tree.id, branch_tree.id): if tc.type == diff_tree.CHANGE_ADD: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode), )) if tc.type == diff_tree.CHANGE_COPY: pass if tc.type == diff_tree.CHANGE_DELETE: target_tree = self._delete(tc.old.path, target_branch) if tc.type == diff_tree.CHANGE_MODIFY: self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode), )) if tc.type == diff_tree.CHANGE_RENAME: pass if tc.type == diff_tree.CHANGE_UNCHANGED: pass msg = "Merge %s to %s" % (source_branch, target_branch) merge_heads = [self.branch_head(source_branch)] sha = self.repo.do_commit(tree=target_tree.id, message=msg, ref=self._branch_ref_name(target_branch), merge_heads=merge_heads, author=author, committer=committer) return {'sha': sha} def get(self, key, shallow=False, branch='master', commit_sha=None): """ Get a tree or blob from the store by key. The key param can be paths such as 'a/b/c'. If the key requested represents a Tree in the git db, then a document will be returned in the form of a python dict. If the key requested represents a Blob in the git db, then a python string will be returned. :param key: The key to retrieve from the store :param branch: The branch name to search for the requested key :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob """ obj = self._get_object(key, branch, commit_sha) if obj: if isinstance(obj, Blob): return self.serializer.loads(obj.data) elif isinstance(obj, Tree): keys = key.split('/') depth = None if shallow: depth = len(keys) tree = self.trees(key, depth=depth, branch=branch) if keys != [ROOT_PATH]: for k in keys: tree = tree[k] return tree return None def _get_object(self, key, branch='master', commit_sha=None): try: if not commit_sha: commit_sha = self.branch_head(branch) (mode, sha) = tree_lookup_path(self.repo.get_object, self._repo_tree(commit_sha), key) return self.repo[sha] except KeyError: return None def put(self, key, value, flatten_keys=True, branch='master', author=None, committer=None): """ Add/Update many key value pairs in the store. The entries param should be a python dict containing one or more key value pairs to store. The keys can be nested paths of objects to set. :param key: The key to store the entry/entries in :param value: The value to store. """ e = {key: value} if flatten_keys: e = flatten(e) root_tree = self._get_object(ROOT_PATH, branch) merge_heads = [] if not root_tree: root_tree = self._get_object(ROOT_PATH) merge_heads = [self.branch_head('master')] blobs = [] msg = '' for (key, value) in e.iteritems(): blob = Blob.from_string(self.serializer.dumps(value)) self.repo.object_store.add_object(blob) blobs.append((key, blob.id, stat.S_IFREG)) msg += "Put %s\n" % key root_id = self._add_tree(root_tree, blobs) sha = self.repo.do_commit(tree=root_id, message=msg, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer) return {'sha': sha} def delete(self, key, branch='master', author=None, committer=None): """ Delete one or more entries from the store. The key param can refer to either a Tree or Blob in the store. If it refers to a Blob, then just that entry will be removed. If it refers to a Tree, then that entire subtree will be removed. :param key: The key to remove from the store. """ tree = self._get_object(key, branch) merge_heads = [] delete_branch = branch if not tree: merge_heads = [self.branch_head('master')] delete_branch = 'master' root = self._delete(key, delete_branch) sha = self.repo.do_commit(tree=root.id, message="Delete %s" % key, ref=self._branch_ref_name(branch), merge_heads=merge_heads, author=author, committer=committer) return {'sha': sha} def _delete(self, key, branch='master'): trees = {} path = key if path: while path: (path, name) = pathsplit(path) trees[path] = self._get_object(path, branch) else: trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch) (path, name) = pathsplit(key) if name: del trees[path][name] else: for entry in trees[path].iteritems(): del trees[path][entry.path] if path: while path: (parent_path, name) = pathsplit(path) trees[parent_path].add(name, stat.S_IFDIR, trees[path].id) self.repo.object_store.add_object(trees[path]) path = parent_path self.repo.object_store.add_object(trees[ROOT_PATH]) else: self.repo.object_store.add_object(trees[ROOT_PATH]) return trees[ROOT_PATH] def _repo_tree(self, commit_sha): return self.repo[commit_sha].tree def keys(self, path=ROOT_PATH, pattern=None, depth=None, filter_by=None, branch='master', commit_sha=None): """ Returns a list of keys from the store. The path param can be used to scope the request to return keys from a subset of the tree. The filter_by param can be used to control whether to return keys for Blob nodes, Tree nodes or all nodes. Default is to return all node keys from the root of the store. :param path: The starting point retrieve key paths from. Default is '' which starts from the root of the store. :param filter_by: Either 'blob', 'tree' or None. Controls what type of node key paths to return. Default is None which returns all node type key paths :param branch: The branch name to return key paths for. :return: A list of keys sorted lexically. """ if filter_by == 'blob': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob) elif filter_by == 'tree': filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree) else: filter_fn = None return map( lambda x: x[0], filter(filter_fn, self.raw_entries(path, pattern, depth, branch, commit_sha))) def entries(self, path=ROOT_PATH, pattern=None, depth=None, branch='master', commit_sha=None): for key, obj in self.raw_entries(path, pattern, depth, branch, commit_sha): if isinstance(obj, Blob): yield (key, self.serializer.loads(str(obj.data))) def raw_entries(self, path=ROOT_PATH, pattern=None, depth=None, branch='master', commit_sha=None): """ Returns a generator that traverses the tree and produces entries of the form (tree_path, git_object), where tree_path is a string representing a key into the store and git_object is either a git Blob or Tree object. :param path: String key to begin producing result entries from. Defaults to '' which starts from the root of the store. :param pattern: Regex pattern to filter matching tree paths. :param depth: Specifies how deep to recurse when producing results. Default is None which does full tree traversal. :param branch: Git branch name to return key paths for. Defaults to HEAD. :return: A generator that produces entries of the form (tree_path, git_object) """ tree = self._get_object(path, branch, commit_sha) if not isinstance(tree, Tree): raise ValueError("Path %s is not a tree!" % path) else: if not pattern: pattern = MATCH_ALL return self._entries(path, tree, pattern, depth) def _entries(self, path, tree, pattern, depth=None): for tree_entry in tree.iteritems(): obj = self.repo[tree_entry.sha] key = self._tree_entry_key(path, tree_entry) if pattern.match(key): yield (key, obj) if isinstance(obj, Tree): if not depth: for te in self._entries(key, obj, pattern, depth): yield te else: if depth > 1: for te in self._entries(key, obj, pattern, depth - 1): yield te def trees(self, path=ROOT_PATH, pattern=None, depth=None, object_depth=None, branch='master', commit_sha=None): """ Returns a python dict representation of the store. The resulting dict can be scoped to a particular subtree in the store with the tree or path params. The tree param is a git Tree object to begin from, while the path is a string key to begin from. The branch param is used to specify the git branch name to build the dict from. :param path: Option string key to begin building the dict from. Defaults to '' which starts from the root of the store. :param pattern: Regex pattern to filter matching tree paths. :param depth: Specifies how deep to recurse when producing results. Default is None which does full tree traversal. :param branch: Optional git branch name to return key paths from. Defaults to HEAD. :return: A dict represents a section of the store. """ tree = {} for path, value in self.entries(path, pattern, depth, branch, commit_sha): expand_tree(path, value, tree, object_depth) return tree def _tree_entry_key(self, path, tree_entry): if path: return "%s/%s" % (path, tree_entry.path) else: return tree_entry.path def _branch_ref_name(self, name): if name.startswith('refs/heads/'): return name else: return "refs/heads/%s" % name def branch_head(self, name): return self.repo.refs[self._branch_ref_name(name)] def _add_tree(self, root_tree, blobs): """Commit a new tree. :param root_tree: Root tree to add trees to :param blobs: Iterable over blob path, sha, mode entries :return: SHA1 of the created tree. """ trees = {"": {}} def add_tree(path): if path in trees: return trees[path] dirname, basename = pathsplit(path) t = add_tree(dirname) assert isinstance(basename, basestring) newtree = {} t[basename] = newtree trees[path] = newtree return newtree for path, sha, mode in blobs: tree_path, basename = pathsplit(path) tree = add_tree(tree_path) tree[basename] = (mode, sha) def build_tree(path): if path: tree = self._get_object(path) if not tree: tree = Tree() if not isinstance(tree, Tree): self.delete(path) tree = Tree() else: tree = root_tree for basename, entry in trees[path].iteritems(): if type(entry) == dict: mode = stat.S_IFDIR sha = build_tree(pathjoin(path, basename)) else: (mode, sha) = entry tree.add(basename, mode, sha) self.repo.object_store.add_object(tree) return tree.id return build_tree("")