예제 #1
0
def lambda_handler(event, context):
    print('local repo creation started')
    local_repo = Repo.init('/tmp/css_download', mkdir=True)
    print('local repo creation successful')
    s3 = boto3.resource('s3')
    print('local repo creation ended')
    for record in event['Records']:
        bucket = record['s3']['bucket']['name']
        key = record['s3']['object']['key']
        print(key)
        break
    s3local = '/tmp/aws.zip'
    s3final = '/tmp/css_download'
    s3.Bucket(bucket).download_file(key, s3local)
    list = os.listdir('/tmp/')
    print(list)
    zip_ref = zipfile.ZipFile(s3local, 'r')
    zip_ref.extractall(s3final)
    zip_ref.close()
    list = os.listdir('/tmp/css_download/')
    print(list)
    print('Stage started')
    Repo.stage(local_repo, list)
    print('Commit started')
    Repo.do_commit(local_repo,
                   b"new commit",
                   committer=b"sandeep <*****@*****.**>")
    print('Push started')
    porcelain.push(
        "/tmp/css_download",
        "https://sandeep.s.k-at-574112450463:N2YDDTf+71bXZUNZjiF6YKFDGYXgPsIhI1GxbIVm+Wg=@git-codecommit.us-east-2.amazonaws.com/v1/repos/css-repo",
        "master")
    print('Push successful')
예제 #2
0
    def dulwichCommit(self, filePath, fullPath, kind):

        git = Repo(AUTOGIT_PATH)
        staged = map(str, [filePath])
        git.stage(staged)

        index = git.open_index()

        try:
            committer = git._get_user_identity()
        except ValueError:
            committer = "autogit"

        try:
            head = git.head()
        except KeyError:
            return git.do_commit('%s - autogit commit (via dulwich)' % kind,
                                 committer=committer)

        changes = list(
            tree_changes(git, index.commit(git.object_store),
                         git['HEAD'].tree))
        if changes and len(changes) > 0:
            return git.do_commit('%s - autogit commit (via dulwich)' % kind,
                                 committer=committer)
        return None
예제 #3
0
 def GitSave(self, vguuid, commentStr):
     """
     Check in changes to config files into git repository
     """
     try:
         #srv = Connection(self.serverDNS,self.userName,self.keyFile)
         self.srv.get('/temp/scst.conf',
                      self.iscsiconfdir + self.serverDNS + '.scst.conf')
         self.srv.get(
             '/temp/' + vguuid,
             self.iscsiconfdir + self.serverDNS + '.' + vguuid + '.lvm')
         try:
             repo = Repo(self.iscsiconfdir)
             filelist = [
                 f for f in listdir(self.iscsiconfdir)
                 if isfile(join(self.iscsiconfdir, f))
             ]
             repo.stage(filelist)
             repo.do_commit(commentStr)
         except:
             var = format_exc()
             logger.error("During GitSave: %s: Git save error: %s" %
                          (commentStr, var))
     except:
         var = format_exc()
         logger.error("During GitSave: %s: PYSFTP download error: %s" %
                      (commentStr, var))
예제 #4
0
class Git():
    """
    object that holds the git repository
    """
    def __init__(self):
        self.repo_path = user_data_dir(appname, appauthor)
        self.files_under_version_controll = ['config.json', 'data.json']
        # initialize repo if it doesn't exist
        try:
            self.repo = Repo(self.repo_path)
        except NotGitRepository:
            # create repo
            if not os.path.exists(self.repo_path):
                try:
                    os.makedirs(self.repo_path)
                except OSError as exc:  # Guard against race condition
                    if exc.errno != errno.EEXIST:
                        raise
            Repo.init(self.repo_path)
            self.repo = Repo(self.repo_path)
            self.commit('initial commit')

    def commit(self, message):
        """
        commits the current status of files_under_version_controll
        :param message: str; commit message
        """
        self.repo.stage(self.files_under_version_controll)
        self.repo.do_commit(str.encode(message), str.encode('nextSongs'))

    def get_current_head(self):
        """
        get sha as bytes of current head
        :return: bytes; sha1 checksum of current head
        """
        return self.repo.head()

    def get_commits(self):
        """
        generates a list of last commits
        :return: list-of-dulwich.objects.Commit
        """
        commits = []
        for i in self.repo.get_walker():
            commits.append(i.commit)
        return reversed(
            sorted(
                commits,
                key=lambda x: datetime.datetime.fromtimestamp(x.author_time)))

    def restore(self, commit):
        """
        does a hard reset to a given commit
        :param commit: list-of-dulwich.objects.Commit; commit to reset to
        """
        porcelain.reset(self.repo, 'hard',
                        str.encode(commit.sha().hexdigest()))
        self.commit("Restored setting and data.")
        Config.read_config()
예제 #5
0
def do_import(commits, repo_loc, overwrite = True, author_="Règlement général <*****@*****.**>"):
    if exists(repo_loc):
        if overwrite:
            print("Deleting existing output directory: %s" % repo_loc)
            shutil.rmtree(repo_loc)

            os.mkdir(repo_loc)
            repo = Repo.init(repo_loc)
        else:
            repo = Repo(repo_loc)
    else:
        os.mkdir(repo_loc)
        repo = Repo.init(repo_loc)


    print("Importing %d commit(s)" % len(commits))

    for i, commit in enumerate(commits):
        date = commit[0]
        print("Commit %d dated %s, %d items" % (i, str(date), len(commit[1])))
        print("  authored by %s" % author_)
        paths_added, paths_removed = create_tree(commit, repo_loc, readme=False, main=commit[2] if len(commit) == 3 else {})
        repo.stage([path.encode(sys.getfilesystemencoding()) for path in set(paths_added)])

        index = repo.open_index()

        print("  Removing %d files" % len(paths_removed))
        for p in paths_removed:
            del index[p.encode(sys.getfilesystemencoding())]
        index.write()

        author = bytes(author_, "UTF-8")

        repo.do_commit(
            bytes("Version du %s" % date.strftime(FMT), "UTF-8"),
            committer=author,
            commit_timestamp=date.timestamp(),
            commit_timezone=int(TZ_PARIS.localize(date).strftime("%z")) * 36)

        ## create tag
        tag_name = bytes(date.strftime(ISO_8601), "UTF-8")
        object = parse_object(repo, "HEAD")
        tag = Tag()
        tag.tagger = author
        tag.name = tag_name
        tag.message = b''
        tag.object = (type(object), object.id)
        tag.tag_time = int(time.time())
        tag.tag_timezone = int(TZ_PARIS.localize(date).strftime("%z")) * 36
        repo.object_store.add_object(tag)
        tag_id = tag.id

        repo.refs[b'refs/tags/' + tag_name] = tag_id

    repo.close()
예제 #6
0
 def makeRepo(self, tree_contents):
     repo = GitRepo(self.repository_path)
     blobs = [(Blob.from_string(contents), filename)
              for (filename, contents) in tree_contents]
     repo.object_store.add_objects(blobs)
     root_id = dulwich.index.commit_tree(
         repo.object_store,
         [(filename, b.id, stat.S_IFREG | 0644) for (b, filename) in blobs])
     repo.do_commit(committer='Joe Foo <*****@*****.**>',
                    message=u'<The commit message>',
                    tree=root_id)
예제 #7
0
 def GitSave(self,commentStr):
     """
     Check in changes to config files into git repository
     """
     try:
         repo = Repo(self.iscsiconfdir)
         filelist = [ f for f in listdir(self.iscsiconfdir) if isfile(join(self.iscsiconfdir,f)) ]
         repo.stage(filelist)
         repo.do_commit(commentStr)
         return 1
     except:
         var = format_exc()
         logger.error("During GitSave %s: Git save error: %s" % (commentStr,var))
         return -1
예제 #8
0
 def makeRepo(self, repository_name, tree_contents):
     repository_path = os.path.join(self.repository_store, repository_name)
     os.makedirs(repository_path)
     self.createRepository(repository_path, bare=self._use_server)
     repo = GitRepo(repository_path)
     blobs = [
         (Blob.from_string(contents), filename) for (filename, contents)
         in tree_contents]
     repo.object_store.add_objects(blobs)
     root_id = dulwich.index.commit_tree(repo.object_store, [
         (filename, b.id, stat.S_IFREG | 0o644)
         for (b, filename) in blobs])
     repo.do_commit(committer='Joe Foo <*****@*****.**>',
         message=u'<The commit message>', tree=root_id)
예제 #9
0
파일: yamldb.py 프로젝트: ytjohn/yamlstore
def writefile(namespacepath, path, data):
    """ Writes data to a file. 
    @param fullpath: fullpath to a file
    @return: True or False
    """
    
    fullpath = "%s/%s" % (namespacepath, path)
    
    # Write the data to the file
    try:
        f = open(fullpath, 'w')
        f.write(data)
        f.close()
    except:
        return (False, "Could not write file %s" % fullpath)
    
    # Now add it to git.
    try:
        repo = Repo(namespacepath)
        repo.stage(path)
        # Obviously, we'll want to get this commit info from somewhere else.
        commit_id = repo.do_commit(
             "An API commit", committer="API Committer <*****@*****.**>")
    except:
        return (False, "Could not commit file %s to namespace %s" % (path, namespace))
    
    return (True, "Commited as %s" % commit_id)
예제 #10
0
 def commit(repo: Repo, msg: str) -> str:
     """Commit everything."""
     for tree_path, entry in repo.open_index().items():
         full_path = os.path.join(repo.path.encode(), tree_path)
         blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
         if blob.id != entry.sha:
             repo.stage(tree_path)
     return repo.do_commit(msg.encode(), b"Source{d} ML Team <*****@*****.**>")
예제 #11
0
    def _dulwich_commit(self, author, message=DEFAULT_COMMIT_MSG):
        """
        Commit staged files in the repo
        """
        _repo = Repo(self.config['top_dir'])
        commit_id = _repo.do_commit(message, committer=author)

        if not _repo.head() == commit_id:
            raise SartorisError(message=exit_codes[14], exit_code=14)
예제 #12
0
 def GitSave(self,vguuid,commentStr):
     """
     Check in changes to config files into git repository
     """
     try:
         #srv = Connection(self.serverDNS,self.userName,self.keyFile)
         self.srv.get('/temp/scst.conf',self.iscsiconfdir+self.serverDNS+'.scst.conf')
         self.srv.get('/temp/'+vguuid,self.iscsiconfdir+self.serverDNS+'.'+vguuid+'.lvm')
         try:
             repo = Repo(self.iscsiconfdir)
             filelist = [ f for f in listdir(self.iscsiconfdir) if isfile(join(self.iscsiconfdir,f)) ]
             repo.stage(filelist)
             repo.do_commit(commentStr)
         except:
             var = format_exc()
             logger.error("During GitSave: %s: Git save error: %s" % (commentStr, var))
     except:
         var = format_exc()
         logger.error("During GitSave: %s: PYSFTP download error: %s" % (commentStr, var))
예제 #13
0
파일: test_branch.py 프로젝트: tpow/breezy
    def test_last_revision_info(self):
        reva = self.simple_commit_a()
        self.build_tree(['b'])
        r = GitRepo(".")
        r.stage("b")
        revb = r.do_commit(b"b", committer=b"Somebody <*****@*****.**>")

        thebranch = Branch.open('.')
        self.assertEqual((2, default_mapping.revision_id_foreign_to_bzr(revb)),
                         thebranch.last_revision_info())
예제 #14
0
파일: commit.py 프로젝트: Swizec/OS2
def commit(root, path, author):
    repo = Repo(root)

    repo.stage([path])

    return repo.do_commit('Automated commit',
                          committer='Git-dropbox',
                          author=author,
                          commit_timestamp=int(time()),
                          commit_timezone=parse_timezone('-0200')[0],
                          author_timestamp=os.path.getctime(os.path.join(root, path)),
                          encoding='UTF-8')
예제 #15
0
파일: commit.py 프로젝트: Swizec/OS2
def commit(root, path, author):
    repo = Repo(root)

    repo.stage([path])

    return repo.do_commit('Automated commit',
                          committer='Git-dropbox',
                          author=author,
                          commit_timestamp=int(time()),
                          commit_timezone=parse_timezone('-0200')[0],
                          author_timestamp=os.path.getctime(
                              os.path.join(root, path)),
                          encoding='UTF-8')
예제 #16
0
	def dulwichCommit(self, filePath, fullPath, kind):

		git = Repo(AUTOGIT_PATH)
		staged = map(str,[filePath])
		git.stage( staged )

		index = git.open_index()

		try:
			committer = git._get_user_identity()
		except ValueError:
			committer = "autogit"

		try:
			head = git.head()
		except KeyError:
			return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer)

		changes = list(tree_changes(git, index.commit(git.object_store), git['HEAD'].tree))
		if changes and len(changes) > 0:
			return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer)
		return None
예제 #17
0
class GitStore(FileStore):
    '''Git versioned filesystem based object storage frontend.'''

    init = 'git://'

    def __init__(self, engine, **kw):
        super(GitStore, self).__init__(engine, **kw)
        try:
            self._repo = Repo(self._dir)
        except NotGitRepository:
            self._repo = Repo.init(self._dir)

    def __setitem__(self, key, value):
        super(GitStore, self).__setitem__(key, value)
        fname = quote_plus(key)
        self._repo.stage([fname])
        self._repo.do_commit('added {0}'.format(fname), committer='shove')

    def __delitem__(self, key):
        super(GitStore, self).__delitem__(key)
        fname = quote_plus(key)
        self._repo.stage([fname])
        self._repo.do_commit('removed {0}'.format(fname))
예제 #18
0
class GitStore(FSStore):
    __slots__ = ("_autocommit", "_repo")

    author = "GitStore <git@indicium>"

    def __init__(self, path=".", extension=".data", autocommit=True):
        super(GitStore, self).__init__(path, extension)
        self._autocommit = autocommit
        gitdir = P.join(self._path, ".git")
        if P.isdir(gitdir):
            self._repo = Repo(self._path)
        else:
            self._repo = Repo.init(self._path)

    def commit(self, message, author=None):
        message += "\n\nCommitted by indicium.git.GitStore"
        if author is None:
            author = self.author
        author = author.encode()
        self._repo.do_commit(committer=author, author=author,
                message=message.encode())

    def put(self, key, value):
        super(GitStore, self).put(key, value)
        self._repo.stage([self.path_for_key(key)])
        if self._autocommit:
            self.commit("put: {!s}".format(normalize(key)))

    def delete(self, key):
        path = self.path_for_key(key)
        if not P.exists(P.join(self._path, path)):
            return
        super(GitStore, self).delete(key)
        self._repo.stage([path])
        if self._autocommit:
            self.commit("delete: {!s}".format(normalize(key)))
예제 #19
0
class GitStore(FileStore):

    """Git versioned filesystem based object storage frontend."""

    init = "git://"

    def __init__(self, engine, **kw):
        super(GitStore, self).__init__(engine, **kw)
        try:
            self._repo = Repo(self._dir)
        except NotGitRepository:
            self._repo = Repo.init(self._dir)

    def __setitem__(self, key, value):
        super(GitStore, self).__setitem__(key, value)
        fname = quote_plus(key)
        self._repo.stage([fname])
        self._repo.do_commit("added {0}".format(fname), committer="shove")

    def __delitem__(self, key):
        super(GitStore, self).__delitem__(key)
        fname = quote_plus(key)
        self._repo.stage([fname])
        self._repo.do_commit("removed {0}".format(fname))
 def test_normal_from_repo(self):
     # Create repo
     folder = Path.cwd()
     try:
         rp = Repo(str(folder))
     except NotGitRepository:
         rp = Repo.init(str(folder))
     try:
         version = rp.head().decode()
         self.original_revision = version
     except KeyError:
         FILE_NAME_TEST = 'file_test.txt'
         test_file = folder / FILE_NAME_TEST
         test_file.touch()
         rp.stage(FILE_NAME_TEST.encode())
         version = rp.do_commit(b'Test commit').decode()
     v = get_source_revision()
     assert v == version[:10]
예제 #21
0
파일: views.py 프로젝트: asaladin/pygiwi
def do_commit(request):
    project = request.matchdict['project']
    page = request.matchdict['page']
    content = request.POST["content"]
    
    #construction of the wiki path
    wikiroot = request.registry.settings['wiki.root']  #from settings in .ini file.
    wikipath = os.path.join(wikiroot, project) #project name is the name of the git repository
    rootfilepath = os.path.join(wikipath, page) #we want one specific file into this directory
    
    files = glob.glob(rootfilepath+".*")  #list files with any extension
    log.debug(files)
    f = files[0]   #we take the first matching file, undertermined results if two files only differs by extension

    handle = open(f, "w")
    fcntl.lockf(handle, fcntl.LOCK_EX)  #acquire a file lock for the opened file
    
    repo = Repo(wikipath)
    strfilename = str(os.path.split(f)[1])
    
    #filename relative to wikipath (ie subdirectory/file.wiki)
    filename_relative_to_wiki = os.path.relpath(f, wikipath).encode("ascii")
    
    commit_id = get_last_commit_id(repo, filename_relative_to_wiki)
    log.debug("last commit id is %s and post['lastcommitid'] is %s"%(commit_id, request.POST['lastcommitid']))
    log.debug(commit_id == request.POST['lastcommitid'])
    #is it a new file or is this file unchanged since form generation?
    if commit_id is None or commit_id == request.POST['lastcommitid']:  
        #no, so go on and do the commit
        
        userinfos = get_user_infos(request)
        handle.write(content.encode('utf-8'))
        
        handle.flush()
        repo.stage([filename_relative_to_wiki])
        log.debug("staging %s"%filename_relative_to_wiki)
        
        rep=repo.do_commit("edited online with pygiwi", committer="%(name)s <%(email)s>"%userinfos)    
        log.debug("commit anwser is: " + rep)
        log.debug('wrote new content to file: %s '%f)
    else: 
        raise RuntimeError("file %s was changed before the commit"%f)
        
    handle.close()  #closing the file relases the lock
예제 #22
0
파일: dotissue.py 프로젝트: lqez/dotissue
def cmd_new(args, config):
    try:
        repo = Repo(args.path)
    except NotGitRepository:
        sys.exit('It does not look like a valid repository.')

    if DI_BRANCH not in get_branch_list(repo):
        sys.exit('Not initialized by dotissue. Use init first.')

    if not args.title:
        EDITOR = os.environ.get('EDITOR', 'vim')
        initial_message = "<Issue title here>"

        with tempfile.NamedTemporaryFile(suffix=".tmp") as msgfile:
            msgfile.write(initial_message)
            msgfile.flush()
            call([EDITOR, msgfile.name])

            with open(msgfile.name, 'r') as f:
                args.title = f.read()

    title = args.title.strip()
    object_store = repo.object_store
    tree = repo[repo['refs/heads/%s' % DI_BRANCH].tree]

    tree_issue = Tree()

    blob = Blob.from_string(title)
    tree_issue.add("_title", 0100644, blob.id)
    object_store.add_object(blob)

    tree.add(blob.id, 0040000, tree_issue.id)

    object_store.add_object(tree_issue)
    object_store.add_object(tree)

    msg = (title[:60] + '..') if len(title) > 60 else title
    commit = repo.do_commit("New issue: %s" % msg,
                            commit_timezone=-timezone,
                            tree=tree.id,
                            ref='refs/heads/%s' % DI_BRANCH)

    print 'Issue created : %s' % commit
    sys.exit(0)
예제 #23
0
파일: dotissue.py 프로젝트: lqez/dotissue
def cmd_init(args, config):
    try:
        repo = Repo(args.path)
    except NotGitRepository:
        sys.exit('It does not look like a valid repository.')

    if DI_BRANCH in get_branch_list(repo):
        sys.exit('Already initialized.')

    object_store = repo.object_store

    tree = Tree()
#    tree_issues = Tree()
#    tree_labels = Tree()

    blob = Blob.from_string("This is a branch for dotissue.\n")
    tree.add("README.md", 0100644, blob.id)
    object_store.add_object(blob)
#
#    blob = Blob.from_string("This is a directory for issues.\n")
#    tree_issues.add("README.md", 0100644, blob.id)
#    object_store.add_object(blob)
#
#    blob = Blob.from_string("This is a directory for labels.\n")
#    tree_labels.add("README.md", 0100644, blob.id)
#    object_store.add_object(blob)
#
#    tree.add(DI_ISSUES, 0040000, tree_issues.id)
#    tree.add(DI_LABELS, 0040000, tree_labels.id)
#
#    object_store.add_object(tree_issues)
#    object_store.add_object(tree_labels)
    object_store.add_object(tree)

    commit = repo.do_commit("Initial commit",
                            commit_timezone=-timezone,
                            tree=tree.id,
                            ref='refs/heads/%s' % DI_BRANCH)

    print 'Initialized by %s. :^D' % commit
    sys.exit(0)
예제 #24
0
class Wiki(HookMixin):
    path = None
    base_path = '/'
    default_ref = 'master'
    default_committer_name = 'Anon'
    default_committer_email = '*****@*****.**'
    index_page = 'home'
    repo = None

    def __init__(self, path):
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            self.repo = Repo.init(path, mkdir=True)

        self.path = path

    def __repr__(self):
        return "Wiki: %s" % self.path

    def commit(self, name, email, message, files):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        print "commit"
        if isinstance(name, unicode):
            name = name.encode('utf-8')
        if isinstance(email, unicode):
            email = email.encode('utf-8')
        if isinstance(message, unicode):
            message = message.encode('utf-8')
        author = committer = "%s <%s>" % (name, email)
        self.repo.stage(files)
        return self.repo.do_commit(message=message,
                                   committer=committer,
                                   author=author)

    def push(commit, approved_by):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        print commit

        output = subprocess.check_output(["git", "pull"])
        print output

        output = subprocess.check_output(["git", "rebase -i " + commit])
        print output

        output = subprocess.check_output([":x"])
        print output

        output = subprocess.check_output(
            ["git", "push origin " + commit + ":master"])
        print output

    def get_page(self, name, sha='HEAD'):
        """Get page data, partials, commit info.

        :param name: Name of page.
        :param sha: Commit sha.
        :return: dict

        """
        return WikiPage(name, self, sha=sha)

    def get_index(self):
        """Get repo index of head.

        :return: list -- List of dicts

        """
        rv = []
        index = self.repo.open_index()
        for name in index:
            rv.append(
                dict(name=filename_to_cname(name),
                     filename=name,
                     ctime=index[name].ctime[0],
                     mtime=index[name].mtime[0],
                     sha=index[name].sha,
                     size=index[name].size))

        return rv
예제 #25
0
파일: store.py 프로젝트: jackscott/herodb
class Store(object):
    """
    A simple key/value store using git as the backing store.
    """

    def __init__(self, id, repo_path, serializer=None):
        self.id = id
        if os.path.exists(repo_path):
            self.repo = Repo(repo_path)
        else:
            raise ValueError("Store repo path does not exist: %s" % repo_path)
        if not serializer:
            self.serializer = json
        else:
            self.serializer = serializer
        self.lock = threading.RLock()

    def gc(self):
        with self.lock:
            if which('git'):
                repo_dir = self.repo.path
                try:
                    log.info("starting gc on repo %s" % repo_dir)
                    subprocess.check_call("git gc --auto", cwd=repo_dir, shell=True)
                    log.info("finished gc on repo %s" % repo_dir)
                    self.repo = Repo(self.repo.path)
                except subprocess.CalledProcessError:
                    log.exception("git gc failed for repo %s" % repo_dir)

    def create_branch(self, branch, parent=None):
        with self.lock:
            if not parent:
                parent = self.branch_head('master')
            branch_ref = self._branch_ref_name(branch)
            self.repo.refs.add_if_new(branch_ref, parent)
            return {'sha': self.branch_head(branch)}

    def merge(self, source_branch, target_branch='master', author=None, committer=None):
        with self.lock:
            if source_branch == target_branch:
                raise ValueError("Cannot merge branch with itself %s" % source_branch)
            target_tree = self._get_object(ROOT_PATH, target_branch)
            branch_tree = self._get_object(ROOT_PATH, source_branch)
            for tc in diff_tree.tree_changes(self.repo.object_store, target_tree.id, branch_tree.id):
                if tc.type == diff_tree.CHANGE_ADD:
                    self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),))
                if tc.type == diff_tree.CHANGE_COPY:
                    pass
                if tc.type == diff_tree.CHANGE_DELETE:
                    target_tree = self._delete(tc.old.path, target_branch)
                if tc.type == diff_tree.CHANGE_MODIFY:
                    self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),))
                if tc.type == diff_tree.CHANGE_RENAME:
                    pass
                if tc.type == diff_tree.CHANGE_UNCHANGED:
                    pass
            msg = "Merge %s to %s" % (source_branch, target_branch)
            merge_heads = [self.branch_head(source_branch)]
            sha = self.repo.do_commit(
                tree=target_tree.id,
                message=msg,
                ref=self._branch_ref_name(target_branch),
                merge_heads=merge_heads,
                author=author,
                committer=committer
            )
            return {'sha': sha}

    def get(self, key, shallow=False, branch='master', commit_sha=None):
        """
        Get a tree or blob from the store by key.  The key param can be paths such as 'a/b/c'.
        If the key requested represents a Tree in the git db, then a document will be
        returned in the form of a python dict.  If the key requested represents a Blob
        in the git db, then a python string will be returned.

        :param key: The key to retrieve from the store
        :param branch: The branch name to search for the requested key
        :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob
        """
        if not commit_sha:
            commit_sha = self.branch_head(branch)
        obj = self._get_object(key, branch, commit_sha)
        if obj:
            if isinstance(obj, Blob):
                return self.serializer.loads(obj.data)
            elif isinstance(obj, Tree):
                keys = key.split('/')
                min_level = len(filter(None, keys))
                if shallow:
                    max_level = min_level+1
                else:
                    max_level = sys.maxint
                tree = self.trees(key, min_level=min_level, max_level=max_level, branch=branch, commit_sha=commit_sha)
                if keys != [ROOT_PATH]:
                    for k in keys:
                        tree = tree[k]
                tree['commit_sha'] = commit_sha
                return tree
        return None

    def _get_object(self, key, branch='master', commit_sha=None, bypass_head_cache=True):
        try:
            if not commit_sha:
                commit_sha = self.branch_head(branch)
            (mode, sha) = tree_lookup_path(self.repo.get_object, self._repo_tree(commit_sha), key)
            return self.repo[sha]
        except KeyError:
            return None
        except NotTreeError:
            return None

    def diff(self, old_sha, new_sha=None):
        """Show the changed files between OLD_SHA and NEW_SHA
        
        If NEW_SHA is not set, it will default to HEAD. The output is a 
        list of tuples (action, filename)

        :param old_sha: parent commit's sha
        :param new_sha: another sha, defaults to HEAD
        :retval: dict
        """
        orig = self._get_object(ROOT_PATH, commit_sha=old_sha)
        new = self._get_object(ROOT_PATH)
        if new_sha:
            new = self._get_object(ROOT_PATH, commit_sha=new_sha)

        keys = { diff_tree.CHANGE_DELETE: 'delete',
                 diff_tree.CHANGE_ADD: 'add',
                 diff_tree.CHANGE_MODIFY: 'modify'}

        out = defaultdict(list)
        for change_tree in diff_tree.tree_changes(self.repo.object_store, orig.id, new.id, want_unchanged=False):
            if change_tree.type.lower() == "delete" and change_tree.old.path:
                # if the change was a delete, we have no tree or blob to yield so return key with no value
                # return in the same type of structure for consistency
                out[change_tree.type].append([(change_tree.old.path, None)])
            else:
                out[change_tree.type].append(filter(None, self.entries(change_tree.new.path)))
        return out


    def put(self, key, value, flatten_keys=True, branch='master', author=None, committer=None, overwrite=False):
        """
        Add/Update many key value pairs in the store.  The entries param should be a python
        dict containing one or more key value pairs to store.  The keys can be nested
        paths of objects to set.

        :param key: The key to store the entry/entries in
        :param value: The value to store.
        """
        with self.lock:
            e = {key: value}
            if flatten_keys:
                e = flatten(e)
            root_tree = self._get_object(ROOT_PATH, branch)
            merge_heads = []
            if not root_tree:
                root_tree = self._get_object(ROOT_PATH)
                merge_heads = [self.branch_head('master')]
            blobs=[]
            msg = ''
            existing_obj = None
            if type(value) == types.DictType:
                try:
                    existing_obj = self.get(key, shallow=True, branch=branch)
                except:
                    pass
            if existing_obj:
                if 'commit_sha' in existing_obj:
                    del existing_obj['commit_sha']
                existing_obj = flatten({key: existing_obj})
            for (k, value) in e.iteritems():
                blob = Blob.from_string(self.serializer.dumps(value))
                self.repo.object_store.add_object(blob)
                blobs.append((k, blob.id, stat.S_IFREG))
                if existing_obj and k in existing_obj:
                    if existing_obj[k] != value:
                        msg += "Put %s\n" % k
                    del existing_obj[k]
                else:
                    msg += "Put %s\n" % k
            if overwrite and existing_obj:
                for k in existing_obj:
                    self.delete(k, branch=branch)
                    root_tree = self._get_object(ROOT_PATH, branch)

            root_id = self._add_tree(root_tree, blobs)
            sha = self.repo.do_commit(
                tree=root_id, message=msg,
                ref=self._branch_ref_name(branch),
                merge_heads=merge_heads,
                author=author,
                committer=committer
            )
            return {'sha': sha}

    def delete(self, key, branch='master', author=None, committer=None):
        """
        Delete one or more entries from the store.  The key param can refer to either
        a Tree or Blob in the store.  If it refers to a Blob, then just that entry will be
        removed.  If it refers to a Tree, then that entire subtree will be removed.

        :param key: The key to remove from the store.
        """
        with self.lock:
            tree = self._get_object(key, branch)
            merge_heads = []
            delete_branch = branch
            if not tree:
                merge_heads = [self.branch_head('master')]
                delete_branch = 'master'
            root = self._delete(key, delete_branch)
            sha = self.repo.do_commit(
                tree=root.id,
                message="Delete %s" % key,
                ref=self._branch_ref_name(branch),
                merge_heads=merge_heads,
                author=author,
                committer=committer
            )
            return {'sha': sha}

    def _delete(self, key, branch='master'):
        trees={}
        path = key
        if path:
            while path:
                (path, name) = pathsplit(path)
                trees[path] = self._get_object(path, branch)
        else:
            trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch)
        (path, name) = pathsplit(key)
        if name:
            del trees[path][name]
        else:
            for entry in trees[path].iteritems():
                del trees[path][entry.path]
        if path:
            while path:
                (parent_path, name) = pathsplit(path)
                trees[parent_path].add(name, stat.S_IFDIR, trees[path].id)
                self.repo.object_store.add_object(trees[path])
                path = parent_path
            self.repo.object_store.add_object(trees[ROOT_PATH])
        else:
            self.repo.object_store.add_object(trees[ROOT_PATH])
        return trees[ROOT_PATH]

    def _repo_tree(self, commit_sha):
        return self.repo[commit_sha].tree

    def keys(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, filter_by=None, branch='master', commit_sha=None):
        """
        Returns a list of keys from the store.  The path param can be used to scope the
        request to return keys from a subset of the tree.  The filter_by param can be used
        to control whether to return keys for Blob nodes, Tree nodes or all nodes.  Default
        is to return all node keys from the root of the store.

        :param path: The starting point retrieve key paths from.  Default is '' which
        starts from the root of the store.
        :param filter_by: Either 'blob', 'tree' or None.  Controls what type of node key
        paths to return.  Default is None which returns all node type key paths
        :param branch: The branch name to return key paths for.
        :return: A list of keys sorted lexically.
        """
        if filter_by == 'blob':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob)
        elif filter_by == 'tree':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree)
        else:
            filter_fn = None
        return map(lambda x: x[0], filter(filter_fn, self.iteritems(path, pattern, min_level, max_level, depth_first, branch, commit_sha)))

    def entries(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, branch='master', commit_sha=None):
        for key, obj in self.iteritems(path, pattern, min_level, max_level, depth_first, branch, commit_sha):
            if isinstance(obj, Blob):
                yield (key, self.serializer.loads(str(obj.data)))

    def iteritems(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, branch='master', commit_sha=None):
        try:
            import gevent
        except:
            gevent = None
        def _node(level, path, node):
            return level, path, node
        root = self._get_object(path, branch=branch, commit_sha=commit_sha)
        bypass_head_cache=False
        level = len(filter(None, path.split('/')))
        if min_level is None:
            min_level = 0
        if max_level is None:
            max_level = sys.maxint
        nodes_to_visit = collections.deque([_node(level, path, root)])
        while len(nodes_to_visit) > 0:
            # allow server to yield to other greenlets during long tree traversals
            if gevent:
                gevent.sleep(0)
            (level, path, node) = nodes_to_visit.popleft()
            if isinstance(node, Tree):
                children = filter(lambda child: min_level < child[0] <= max_level, 
                                  map(lambda child: _node(level+1, *self._tree_entry(path, child, bypass_head_cache)), 
                                      node.iteritems()))
                if depth_first:
                    nodes_to_visit.extendleft(children)
                else:
                    nodes_to_visit.extend(children)
            if min_level < level <= max_level:
                if pattern is not None:
                    if pattern.match(path):
                        yield (path, node)
                else:
                    yield (path, node)

    def trees(self, path=ROOT_PATH, pattern=None, min_level=None, max_level=None, depth_first=True, object_depth=None, branch='master', commit_sha=None):
        """
        Returns a python dict representation of the store.  The resulting dict can be
        scoped to a particular subtree in the store with the tree or path params.  The
        tree param is a git Tree object to begin from, while the path is a string key
        to begin from.  The branch param is used to specify the git branch name
        to build the dict from.

        :param path: Option string key to begin building the dict from.  Defaults to
        '' which starts from the root of the store.
        :param pattern: Regex pattern to filter matching tree paths.
        does full tree traversal.
        :param branch: Optional git branch name to return key paths from.
        Defaults to HEAD.
        :return: A dict represents a section of the store.
        """
        tree = {}
        for key, value in self.entries(path, pattern, min_level, max_level, depth_first, branch, commit_sha):
            expand_tree(key, value, tree, object_depth)
        return tree

    def _head_cache_key(self, key):
        return "%s/%s" % (self.id, key)

    def _tree_entry(self, path, tree_entry, branch='master', bypass_head_cache=False):
        child_path = self._tree_entry_key(path, tree_entry)
        obj = None
        if obj is None:
            obj = self.repo[tree_entry.sha]
        return child_path, obj

    def _tree_entry_key(self, path, tree_entry):
        if path:
            return "%s/%s" % (path, tree_entry.path)
        else:
            return tree_entry.path

    def _branch_ref_name(self, name):
        if name.startswith('refs/heads/'):
            return name
        else:
            return "refs/heads/%s" % name

    def branch_head(self, name):
        with self.lock:
            return self.repo.refs[self._branch_ref_name(name)]

    def _add_tree(self, root_tree, blobs, branch='master', commit_sha=None):
        """Commit a new tree.

        :param root_tree: Root tree to add trees to
        :param blobs: Iterable over blob path, sha, mode entries
        :return: SHA1 of the created tree.
        """
        trees = {"": {}}
        def add_tree(path):
            if path in trees:
                return trees[path]
            dirname, basename = pathsplit(path)
            t = add_tree(dirname)
            assert isinstance(basename, basestring)
            newtree = {}
            t[basename] = newtree
            trees[path] = newtree
            return newtree

        for path, sha, mode in blobs:
            tree_path, basename = pathsplit(path)
            tree = add_tree(tree_path)
            tree[basename] = (mode, sha)

        def build_tree(path):
            if path:
                tree = self._get_object(path, branch=branch, commit_sha=commit_sha)
                if not tree:
                    tree = Tree()
                if not isinstance(tree, Tree):
                    self.delete(path, branch=branch)
                    tree = Tree()
            else:
                tree = root_tree
            for basename, entry in trees[path].iteritems():
                if type(entry) == dict:
                    mode = stat.S_IFDIR
                    sha = build_tree(pathjoin(path, basename))
                else:
                    (mode, sha) = entry
                tree.add(basename, mode, sha)
            self.repo.object_store.add_object(tree)
            return tree.id
        return build_tree("")
예제 #26
0
#!/usr/bin/env python3
import os, sys, socket, traceback, json, yaml, getpass
from dulwich.repo import Repo

repo_path = os.path.realpath(os.path.expanduser('~/.myrepo'))
if not os.path.exists(repo_path):
    os.mkdir(repo_path)
if not os.path.exists('{}/.git'.format(repo_path)):
    repo = Repo.init(repo_path)
else:
    repo = Repo(repo_path)

yaml.dump(repo, sys.stdout)
index = repo.open_index()
MSG = f'  repo index path={index.path}, index list={list(index)}, '
yaml.dump(MSG, sys.stdout)

f = open(f'{repo_path}/foo', 'wb')
_ = f.write(b"monty1")
f.close()
repo.stage([b"foo"])

print(",".join(
    [f.decode(sys.getfilesystemencoding()) for f in repo.open_index()]))

commit_id = repo.do_commit(
    b"The first commit")  #, committer=getpass.getuser().encode())
print(f'    commit_id={commit_id},     repo_head = {repo.head()}   ')

#repo = Repo("myrepo")
예제 #27
0
파일: repo.py 프로젝트: licode/VisTrails
class GitRepo(object):
    def __init__(self, path):
        if os.path.exists(path):
            if not os.path.isdir(path):
                raise IOError('Git repository "%s" must be a directory.' %
                              path)
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            # repo does not exist
            self.repo = Repo.init(path, not os.path.exists(path))

        self.temp_persist_files = []

    def _get_commit(self, version="HEAD"):
        commit = self.repo[version]
        if not isinstance(commit, Commit):
            raise NotCommitError(commit)
        return commit

    def get_type(self, name, version="HEAD"):
        commit = self._get_commit(version)

        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        if tree[name][0] & stat.S_IFDIR:
            return "tree"
        else:
            return "blob"

    def get_path(self,
                 name,
                 version="HEAD",
                 path_type=None,
                 out_name=None,
                 out_suffix=''):
        if path_type is None:
            path_type = self.get_type(name, version)
        if path_type == 'tree':
            return self.get_dir(name, version, out_name, out_suffix)
        elif path_type == 'blob':
            return self.get_file(name, version, out_name, out_suffix)

        raise TypeError("Unknown path type '%s'" % path_type)

    def _write_blob(self, blob_sha, out_fname=None, out_suffix=''):
        if out_fname is None:
            # create a temporary file
            (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix,
                                               prefix='vt_persist')
            os.close(fd)
            self.temp_persist_files.append(out_fname)
        else:
            out_dirname = os.path.dirname(out_fname)
            if out_dirname and not os.path.exists(out_dirname):
                os.makedirs(out_dirname)

        blob = self.repo.get_blob(blob_sha)
        with open(out_fname, "wb") as f:
            for b in blob.as_raw_chunks():
                f.write(b)
        return out_fname

    def get_file(self, name, version="HEAD", out_fname=None, out_suffix=''):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find blob "%s"' % name)
        blob_sha = tree[name][1]
        out_fname = self._write_blob(blob_sha, out_fname, out_suffix)
        return out_fname

    def get_dir(self, name, version="HEAD", out_dirname=None, out_suffix=''):
        if out_dirname is None:
            # create a temporary directory
            out_dirname = tempfile.mkdtemp(suffix=out_suffix,
                                           prefix='vt_persist')
            self.temp_persist_files.append(out_dirname)
        elif not os.path.exists(out_dirname):
            os.makedirs(out_dirname)

        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find tree "%s"' % name)
        subtree_id = tree[name][1]
        # subtree = self.repo.tree(subtree_id)
        for entry in self.repo.object_store.iter_tree_contents(subtree_id):
            out_fname = os.path.join(out_dirname, entry.path)
            self._write_blob(entry.sha, out_fname)
        return out_dirname

    def get_hash(self, name, version="HEAD", path_type=None):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        return tree[name][1]

    @staticmethod
    def compute_blob_hash(fname, chunk_size=1 << 16):
        obj_len = os.path.getsize(fname)
        head = object_header(Blob.type_num, obj_len)
        with open(fname, "rb") as f:

            def read_chunk():
                return f.read(chunk_size)

            my_iter = chain([head], iter(read_chunk, ''))
            return iter_sha1(my_iter)

    @staticmethod
    def compute_tree_hash(dirname):
        tree = Tree()
        for entry in sorted(os.listdir(dirname)):
            fname = os.path.join(dirname, entry)
            if os.path.isdir(fname):
                thash = GitRepo.compute_tree_hash(fname)
                mode = stat.S_IFDIR  # os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, thash)
            elif os.path.isfile(fname):
                bhash = GitRepo.compute_blob_hash(fname)
                mode = os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, bhash)
        return tree.id

    @staticmethod
    def compute_hash(path):
        if os.path.isdir(path):
            return GitRepo.compute_tree_hash(path)
        elif os.path.isfile(path):
            return GitRepo.compute_blob_hash(path)
        raise TypeError("Do not support this type of path")

    def get_latest_version(self, path):
        head = self.repo.head()
        walker = Walker(self.repo.object_store, [head],
                        max_entries=1,
                        paths=[path])
        return iter(walker).next().commit.id

    def _stage(self, filename):
        fullpath = os.path.join(self.repo.path, filename)
        if os.path.islink(fullpath):
            debug.warning("Warning: not staging symbolic link %s" %
                          os.path.basename(filename))
        elif os.path.isdir(fullpath):
            for f in os.listdir(fullpath):
                self._stage(os.path.join(filename, f))
        else:
            if os.path.sep != '/':
                filename = filename.replace(os.path.sep, '/')
            self.repo.stage(filename)

    def add_commit(self, filename):
        self.setup_git()
        self._stage(filename)
        commit_id = self.repo.do_commit('Updated %s' % filename)
        return commit_id

    def setup_git(self):
        config_stack = self.repo.get_config_stack()

        try:
            config_stack.get(('user', ), 'name')
            config_stack.get(('user', ), 'email')
        except KeyError:
            from vistrails.core.system import current_user
            from dulwich.config import ConfigFile
            user = current_user()
            repo_conf = self.repo.get_config()
            repo_conf.set(('user', ), 'name', user)
            repo_conf.set(('user', ), 'email', '%s@localhost' % user)
            repo_conf.write_to_path()
예제 #28
0
class GitStorage():
    def _ignoreFile(self, dirName, fileName):
        """
        used for the copTree stuff
        ``dirName``
            the working directory
        ``fileName``
            list of files inside the directory (dirName)
        """
        result = []
        for i in fileName:
            path = dirName + i
            if path not in fileToIgnore:
                result.append(path)
        return result

    def _commit(self, tree):
        """
        commit a tree used only by the init
        ``tree``
            tree to commit
        """
        commit = Commit()
        commit.tree = tree.id
        commit.encoding = "UTF-8"
        commit.committer = commit.author = 'debexpo <%s>' % (pylons.config['debexpo.email'])
        commit.commit_time = commit.author_time = int(time())
        tz = parse_timezone('-0200')[0]
        commit.commit_timezone = commit.author_timezone = tz
        commit.message = " "
        self.repo.object_store.add_object(tree)
        self.repo.object_store.add_object(commit)
        self.repo.refs["HEAD"] = commit.id
        log.debug('commiting')
        return commit.id

    def __init__(self, path):
        #creating the repository
        if os.path.isdir(path):
            log.debug("directory exist, taking it as a git repository")
            self.repo = Repo(path)
        else:
            log.debug("directory doesn't exist, creating")
            os.makedirs(path)
            log.debug("initiate the repo")
            self.repo = Repo.init(path)
            log.debug("adding an empty tree to the repository")
            self._commit(Tree())

    #only this function will be used on upload
    def change(self, files):
        """
        used to change  afile in the git storage can be called for the first upload we don't care
        ``files``
            a list of file to change
        """
        if len(files) == 0:
            log.debug("trying to change nothing will do... nothing")
        else:
            log.debug("this will change %i files" % (len(files)))
            for f in files:
                self.repo.stage(str(f))
            log.debug("stages dones")
            self.repo.do_commit("this is so awesome that nobody will never see it",
                committer="same here <*****@*****.**>")

    def buildTreeDiff(self, dest, tree=None, originalTree=None):
        """
        creating files from the diff between 2 trees, it will be used in the code browser to get older version
        (walking on history)
        ``tree``
            the tree that you want to compare to
        ``dest``
            the destination folder to build sources
        ``originalTree``
            the original Tree, by default it's the last one

        by default it's retun the last changed files

        """
        if tree is None:
            head = self.repo.commit(self.repo.commit(self.repo.head()).parents[0])
            tree = self.repo.tree(head.tree)
        if originalTree is None:
            originalTree = self.repo.tree(self.repo.commit(self.repo.head()).tree)
        blobToBuild = []
        #getting blob that have changed
        for blob in self.repo.object_store.iter_tree_contents(tree.id):
            if blob not in originalTree:
                blobToBuild.append(blob)
                fileToIgnore.append(blob.path)
        repoLocation = os.path.join(str(self.repo).split("'")[1])
        #creating the folder with link to older files
        if os.path.exists(repoLocation + dest):
            log.warning("%s already exist, copy will not work")
        else:
            log.debug("copying files")
            shutil.copytree(repoLocation, repoLocation + dest, symlinks=True, ignore=self._ignoreFile)
        for b in blobToBuild:
            fileDirectory = os.path.split(b.path)
            fileDirectory.pop()
            if not os.path.exists(os.path.join(repoLocation + dest, os.path.join(fileDirectory))):
                os.makedirs(os.path.join(repoLocation + dest, os.path.join(fileDirectory)))
            file = open(os.path.join(repoLocation + dest, b.path), 'w')
            file.write(self.repo.get_object(b.sha).as_raw_string())
            file.close()
        tree = None
        originalTree = None

    #get*
    def getLastTree(self):
        """
        return the last tree
        """
        return self.repo.tree(self.repo._commit(self.repo.head()).tree)

    def getAllTrees(self):
        """
        return trees
        """
        result = []
        commit = self.repo._commit(self.repo.head())
        for c in commit._get_parents():
            result.append(c.tree)
        return result

    def getOlderFileContent(self, file):
        """
        return the first file's content that changed from the file
        ``file``
            the file to work on
        """
        with open(file) as f:
            originalBlob = Blob.from_string("".join(f.readlines()))
        trees = self.getAllTrees()
        for t in trees:
            #parsing tree in order to find the tree where the file change
            if originalBlob not in t:
                tree = t
                break
                #tree must be existent, other way file is not correct
        if tree is None:
            log.error(
                "there is no tree that contain this blob this souldn't happen, other way this file does not appear to come from this package")
        else:
            if self.repo._commit(self.repo.head()).tree == tree:
                olderTree = self.repo.commit(self.repo.head())._get_parents()[0].tree
            else:
                for c in self.repo._commit(self.repo.head())._get_parents():
                    if c.tree == tree:
                        try:
                            olderTree = c.get_parents()[0]
                        except IndexError:
                            log.debug("file is the last version")
                            olderTree = tree
            if olderTree != tree:
                #we must check here the blob that contains the older file
                for b in self.repo.object_store.iter_tree_contents(olderTree.id):
                    if originalBlob.path == b.path:
                        #older blob find! awesome, in the first loop we already test if they are the same
                        # that's why we can now return the content of the file
                        return self.repo.get_object(b.sha).as_raw_string()
        return ""

    def getOlderCommits(self):
        """
        return a list of all commits
        """
        return self.repo.commit(self.repo.head())._get_parents()
예제 #29
0
파일: store.py 프로젝트: pombredanne/herodb
class Store(object):
    """
    A simple key/value store using git as the backing store.
    """
    def __init__(self, id, repo_path, serializer=None):
        self.id = id
        if os.path.exists(repo_path):
            self.repo = Repo(repo_path)
        else:
            raise ValueError("Store repo path does not exist: %s" % repo_path)
        if not serializer:
            self.serializer = json
        else:
            self.serializer = serializer
        self.lock = threading.RLock()

    def gc(self):
        with self.lock:
            if which('git'):
                repo_dir = self.repo.path
                try:
                    log.info("starting gc on repo %s" % repo_dir)
                    subprocess.check_call("git gc --auto",
                                          cwd=repo_dir,
                                          shell=True)
                    log.info("finished gc on repo %s" % repo_dir)
                    self.repo = Repo(self.repo.path)
                except subprocess.CalledProcessError:
                    log.exception("git gc failed for repo %s" % repo_dir)

    def create_branch(self, branch, parent=None):
        with self.lock:
            if not parent:
                parent = self.branch_head('master')
            branch_ref = self._branch_ref_name(branch)
            self.repo.refs.add_if_new(branch_ref, parent)
            return {'sha': self.branch_head(branch)}

    def merge(self,
              source_branch,
              target_branch='master',
              author=None,
              committer=None):
        with self.lock:
            if source_branch == target_branch:
                raise ValueError("Cannot merge branch with itself %s" %
                                 source_branch)
            target_tree = self._get_object(ROOT_PATH, target_branch)
            branch_tree = self._get_object(ROOT_PATH, source_branch)
            for tc in diff_tree.tree_changes(self.repo.object_store,
                                             target_tree.id, branch_tree.id):
                if tc.type == diff_tree.CHANGE_ADD:
                    self._add_tree(target_tree,
                                   ((tc.new.path, tc.new.sha, tc.new.mode), ))
                if tc.type == diff_tree.CHANGE_COPY:
                    pass
                if tc.type == diff_tree.CHANGE_DELETE:
                    target_tree = self._delete(tc.old.path, target_branch)
                if tc.type == diff_tree.CHANGE_MODIFY:
                    self._add_tree(target_tree,
                                   ((tc.new.path, tc.new.sha, tc.new.mode), ))
                if tc.type == diff_tree.CHANGE_RENAME:
                    pass
                if tc.type == diff_tree.CHANGE_UNCHANGED:
                    pass
            msg = "Merge %s to %s" % (source_branch, target_branch)
            merge_heads = [self.branch_head(source_branch)]
            sha = self.repo.do_commit(tree=target_tree.id,
                                      message=msg,
                                      ref=self._branch_ref_name(target_branch),
                                      merge_heads=merge_heads,
                                      author=author,
                                      committer=committer)
            return {'sha': sha}

    def get(self, key, shallow=False, branch='master', commit_sha=None):
        """
        Get a tree or blob from the store by key.  The key param can be paths such as 'a/b/c'.
        If the key requested represents a Tree in the git db, then a document will be
        returned in the form of a python dict.  If the key requested represents a Blob
        in the git db, then a python string will be returned.

        :param key: The key to retrieve from the store
        :param branch: The branch name to search for the requested key
        :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob
        """
        if not commit_sha:
            commit_sha = self.branch_head(branch)
        obj = self._get_object(key, branch, commit_sha)
        if obj:
            if isinstance(obj, Blob):
                return self.serializer.loads(obj.data)
            elif isinstance(obj, Tree):
                keys = key.split('/')
                min_level = len(filter(None, keys))
                if shallow:
                    max_level = min_level + 1
                else:
                    max_level = sys.maxint
                tree = self.trees(key,
                                  min_level=min_level,
                                  max_level=max_level,
                                  branch=branch,
                                  commit_sha=commit_sha)
                if keys != [ROOT_PATH]:
                    for k in keys:
                        tree = tree[k]
                tree['commit_sha'] = commit_sha
                return tree
        return None

    def _get_object(self,
                    key,
                    branch='master',
                    commit_sha=None,
                    bypass_head_cache=True):
        try:
            if not commit_sha:
                commit_sha = self.branch_head(branch)
            (mode, sha) = tree_lookup_path(self.repo.get_object,
                                           self._repo_tree(commit_sha), key)
            return self.repo[sha]
        except KeyError:
            return None
        except NotTreeError:
            return None

    def diff(self, old_sha, new_sha=None):
        """Show the changed files between OLD_SHA and NEW_SHA
        
        If NEW_SHA is not set, it will default to HEAD. The output is a 
        list of tuples (action, filename)

        :param old_sha: parent commit's sha
        :param new_sha: another sha, defaults to HEAD
        :retval: dict
        """
        orig = self._get_object(ROOT_PATH, commit_sha=old_sha)
        new = self._get_object(ROOT_PATH)
        if new_sha:
            new = self._get_object(ROOT_PATH, commit_sha=new_sha)

        keys = {
            diff_tree.CHANGE_DELETE: 'delete',
            diff_tree.CHANGE_ADD: 'add',
            diff_tree.CHANGE_MODIFY: 'modify'
        }

        out = defaultdict(list)
        for change_tree in diff_tree.tree_changes(self.repo.object_store,
                                                  orig.id,
                                                  new.id,
                                                  want_unchanged=False):
            if change_tree.type.lower() == "delete" and change_tree.old.path:
                # if the change was a delete, we have no tree or blob to yield so return key with no value
                # return in the same type of structure for consistency
                out[change_tree.type].append([(change_tree.old.path, None)])
            else:
                out[change_tree.type].append(
                    filter(None, self.entries(change_tree.new.path)))
        return out

    def put(self,
            key,
            value,
            flatten_keys=True,
            branch='master',
            author=None,
            committer=None,
            overwrite=False):
        """
        Add/Update many key value pairs in the store.  The entries param should be a python
        dict containing one or more key value pairs to store.  The keys can be nested
        paths of objects to set.

        :param key: The key to store the entry/entries in
        :param value: The value to store.
        """
        with self.lock:
            e = {key: value}
            if flatten_keys:
                e = flatten(e)
            root_tree = self._get_object(ROOT_PATH, branch)
            merge_heads = []
            if not root_tree:
                root_tree = self._get_object(ROOT_PATH)
                merge_heads = [self.branch_head('master')]
            blobs = []
            msg = ''
            existing_obj = None
            if type(value) == types.DictType:
                try:
                    existing_obj = self.get(key, shallow=True, branch=branch)
                except:
                    pass
            if existing_obj:
                if 'commit_sha' in existing_obj:
                    del existing_obj['commit_sha']
                existing_obj = flatten({key: existing_obj})
            for (k, value) in e.iteritems():
                blob = Blob.from_string(self.serializer.dumps(value))
                self.repo.object_store.add_object(blob)
                blobs.append((k, blob.id, stat.S_IFREG))
                if existing_obj and k in existing_obj:
                    if existing_obj[k] != value:
                        msg += "Put %s\n" % k
                    del existing_obj[k]
                else:
                    msg += "Put %s\n" % k
            if overwrite and existing_obj:
                for k in existing_obj:
                    self.delete(k, branch=branch)
                    root_tree = self._get_object(ROOT_PATH, branch)

            root_id = self._add_tree(root_tree, blobs)
            sha = self.repo.do_commit(tree=root_id,
                                      message=msg,
                                      ref=self._branch_ref_name(branch),
                                      merge_heads=merge_heads,
                                      author=author,
                                      committer=committer)
            return {'sha': sha}

    def delete(self, key, branch='master', author=None, committer=None):
        """
        Delete one or more entries from the store.  The key param can refer to either
        a Tree or Blob in the store.  If it refers to a Blob, then just that entry will be
        removed.  If it refers to a Tree, then that entire subtree will be removed.

        :param key: The key to remove from the store.
        """
        with self.lock:
            tree = self._get_object(key, branch)
            merge_heads = []
            delete_branch = branch
            if not tree:
                merge_heads = [self.branch_head('master')]
                delete_branch = 'master'
            root = self._delete(key, delete_branch)
            sha = self.repo.do_commit(tree=root.id,
                                      message="Delete %s" % key,
                                      ref=self._branch_ref_name(branch),
                                      merge_heads=merge_heads,
                                      author=author,
                                      committer=committer)
            return {'sha': sha}

    def _delete(self, key, branch='master'):
        trees = {}
        path = key
        if path:
            while path:
                (path, name) = pathsplit(path)
                trees[path] = self._get_object(path, branch)
        else:
            trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch)
        (path, name) = pathsplit(key)
        if name:
            del trees[path][name]
        else:
            for entry in trees[path].iteritems():
                del trees[path][entry.path]
        if path:
            while path:
                (parent_path, name) = pathsplit(path)
                trees[parent_path].add(name, stat.S_IFDIR, trees[path].id)
                self.repo.object_store.add_object(trees[path])
                path = parent_path
            self.repo.object_store.add_object(trees[ROOT_PATH])
        else:
            self.repo.object_store.add_object(trees[ROOT_PATH])
        return trees[ROOT_PATH]

    def _repo_tree(self, commit_sha):
        return self.repo[commit_sha].tree

    def keys(self,
             path=ROOT_PATH,
             pattern=None,
             min_level=None,
             max_level=None,
             depth_first=True,
             filter_by=None,
             branch='master',
             commit_sha=None):
        """
        Returns a list of keys from the store.  The path param can be used to scope the
        request to return keys from a subset of the tree.  The filter_by param can be used
        to control whether to return keys for Blob nodes, Tree nodes or all nodes.  Default
        is to return all node keys from the root of the store.

        :param path: The starting point retrieve key paths from.  Default is '' which
        starts from the root of the store.
        :param filter_by: Either 'blob', 'tree' or None.  Controls what type of node key
        paths to return.  Default is None which returns all node type key paths
        :param branch: The branch name to return key paths for.
        :return: A list of keys sorted lexically.
        """
        if filter_by == 'blob':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob)
        elif filter_by == 'tree':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree)
        else:
            filter_fn = None
        return map(
            lambda x: x[0],
            filter(
                filter_fn,
                self.iteritems(path, pattern, min_level, max_level,
                               depth_first, branch, commit_sha)))

    def entries(self,
                path=ROOT_PATH,
                pattern=None,
                min_level=None,
                max_level=None,
                depth_first=True,
                branch='master',
                commit_sha=None):
        for key, obj in self.iteritems(path, pattern, min_level, max_level,
                                       depth_first, branch, commit_sha):
            if isinstance(obj, Blob):
                yield (key, self.serializer.loads(str(obj.data)))

    def iteritems(self,
                  path=ROOT_PATH,
                  pattern=None,
                  min_level=None,
                  max_level=None,
                  depth_first=True,
                  branch='master',
                  commit_sha=None):
        try:
            import gevent
        except:
            gevent = None

        def _node(level, path, node):
            return level, path, node

        root = self._get_object(path, branch=branch, commit_sha=commit_sha)
        bypass_head_cache = False
        level = len(filter(None, path.split('/')))
        if min_level is None:
            min_level = 0
        if max_level is None:
            max_level = sys.maxint
        nodes_to_visit = collections.deque([_node(level, path, root)])
        while len(nodes_to_visit) > 0:
            # allow server to yield to other greenlets during long tree traversals
            if gevent:
                gevent.sleep(0)
            (level, path, node) = nodes_to_visit.popleft()
            if isinstance(node, Tree):
                children = filter(
                    lambda child: min_level < child[0] <= max_level,
                    map(
                        lambda child: _node(
                            level + 1,
                            *self._tree_entry(path, child, bypass_head_cache)),
                        node.iteritems()))
                if depth_first:
                    nodes_to_visit.extendleft(children)
                else:
                    nodes_to_visit.extend(children)
            if min_level < level <= max_level:
                if pattern is not None:
                    if pattern.match(path):
                        yield (path, node)
                else:
                    yield (path, node)

    def trees(self,
              path=ROOT_PATH,
              pattern=None,
              min_level=None,
              max_level=None,
              depth_first=True,
              object_depth=None,
              branch='master',
              commit_sha=None):
        """
        Returns a python dict representation of the store.  The resulting dict can be
        scoped to a particular subtree in the store with the tree or path params.  The
        tree param is a git Tree object to begin from, while the path is a string key
        to begin from.  The branch param is used to specify the git branch name
        to build the dict from.

        :param path: Option string key to begin building the dict from.  Defaults to
        '' which starts from the root of the store.
        :param pattern: Regex pattern to filter matching tree paths.
        does full tree traversal.
        :param branch: Optional git branch name to return key paths from.
        Defaults to HEAD.
        :return: A dict represents a section of the store.
        """
        tree = {}
        for key, value in self.entries(path, pattern, min_level, max_level,
                                       depth_first, branch, commit_sha):
            expand_tree(key, value, tree, object_depth)
        return tree

    def _head_cache_key(self, key):
        return "%s/%s" % (self.id, key)

    def _tree_entry(self,
                    path,
                    tree_entry,
                    branch='master',
                    bypass_head_cache=False):
        child_path = self._tree_entry_key(path, tree_entry)
        obj = None
        if obj is None:
            obj = self.repo[tree_entry.sha]
        return child_path, obj

    def _tree_entry_key(self, path, tree_entry):
        if path:
            return "%s/%s" % (path, tree_entry.path)
        else:
            return tree_entry.path

    def _branch_ref_name(self, name):
        if name.startswith('refs/heads/'):
            return name
        else:
            return "refs/heads/%s" % name

    def branch_head(self, name):
        with self.lock:
            return self.repo.refs[self._branch_ref_name(name)]

    def _add_tree(self, root_tree, blobs, branch='master', commit_sha=None):
        """Commit a new tree.

        :param root_tree: Root tree to add trees to
        :param blobs: Iterable over blob path, sha, mode entries
        :return: SHA1 of the created tree.
        """
        trees = {"": {}}

        def add_tree(path):
            if path in trees:
                return trees[path]
            dirname, basename = pathsplit(path)
            t = add_tree(dirname)
            assert isinstance(basename, basestring)
            newtree = {}
            t[basename] = newtree
            trees[path] = newtree
            return newtree

        for path, sha, mode in blobs:
            tree_path, basename = pathsplit(path)
            tree = add_tree(tree_path)
            tree[basename] = (mode, sha)

        def build_tree(path):
            if path:
                tree = self._get_object(path,
                                        branch=branch,
                                        commit_sha=commit_sha)
                if not tree:
                    tree = Tree()
                if not isinstance(tree, Tree):
                    self.delete(path, branch=branch)
                    tree = Tree()
            else:
                tree = root_tree
            for basename, entry in trees[path].iteritems():
                if type(entry) == dict:
                    mode = stat.S_IFDIR
                    sha = build_tree(pathjoin(path, basename))
                else:
                    (mode, sha) = entry
                tree.add(basename, mode, sha)
            self.repo.object_store.add_object(tree)
            return tree.id

        return build_tree("")
예제 #30
0
파일: store.py 프로젝트: tfmorris/herodb
class Store(object):
    """
    A simple key/value store using git as the backing store.
    """

    def __init__(self, repo_path, serializer=None):
        if os.path.exists(repo_path):
            self.repo = Repo(repo_path)
        else:
            raise ValueError("Store repo path does not exist: %s" % repo_path)
        if not serializer:
            self.serializer = json
        else:
            self.serializer = serializer

    def create_branch(self, branch, parent=None):
        if not parent:
            parent = self.branch_head('master')
        branch_ref = self._branch_ref_name(branch)
        self.repo.refs.add_if_new(branch_ref, parent)
        return {'sha': self.branch_head(branch)}

    def merge(self, source_branch, target_branch='master', author=None, committer=None):
        if source_branch == target_branch:
            raise ValueError("Cannot merge branch with itself %s" % source_branch)
        target_tree = self._get_object(ROOT_PATH, target_branch)
        branch_tree = self._get_object(ROOT_PATH, source_branch)
        for tc in diff_tree.tree_changes(self.repo.object_store, target_tree.id, branch_tree.id):
            if tc.type == diff_tree.CHANGE_ADD:
                self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),))
            if tc.type == diff_tree.CHANGE_COPY:
                pass
            if tc.type == diff_tree.CHANGE_DELETE:
                target_tree = self._delete(tc.old.path, target_branch)
            if tc.type == diff_tree.CHANGE_MODIFY:
                self._add_tree(target_tree, ((tc.new.path, tc.new.sha, tc.new.mode),))
            if tc.type == diff_tree.CHANGE_RENAME:
                pass
            if tc.type == diff_tree.CHANGE_UNCHANGED:
                pass
        msg = "Merge %s to %s" % (source_branch, target_branch)
        merge_heads = [self.branch_head(source_branch)]
        sha = self.repo.do_commit(
            tree=target_tree.id,
            message=msg,
            ref=self._branch_ref_name(target_branch),
            merge_heads=merge_heads,
            author=author,
            committer=committer
        )
        return {'sha': sha}

    def get(self, key, shallow=False, branch='master', commit_sha=None):
        """
        Get a tree or blob from the store by key.  The key param can be paths such as 'a/b/c'.
        If the key requested represents a Tree in the git db, then a document will be
        returned in the form of a python dict.  If the key requested represents a Blob
        in the git db, then a python string will be returned.

        :param key: The key to retrieve from the store
        :param branch: The branch name to search for the requested key
        :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob
        """
        obj = self._get_object(key, branch, commit_sha)
        if obj:
            if isinstance(obj, Blob):
                return self.serializer.loads(obj.data)
            elif isinstance(obj, Tree):
                keys = key.split('/')
                depth = None
                if shallow:
                    depth = len(keys)
                tree = self.trees(key, depth=depth, branch=branch)
                if keys != [ROOT_PATH]:
                    for k in keys:
                        tree = tree[k]
                return tree
        return None

    def _get_object(self, key, branch='master', commit_sha=None):
        try:
            if not commit_sha:
                commit_sha = self.branch_head(branch)
            (mode, sha) = tree_lookup_path(self.repo.get_object, self._repo_tree(commit_sha), key)
            return self.repo[sha]
        except KeyError:
            return None

    def put(self, key, value, flatten_keys=True, branch='master', author=None, committer=None):
        """
        Add/Update many key value pairs in the store.  The entries param should be a python
        dict containing one or more key value pairs to store.  The keys can be nested
        paths of objects to set.

        :param key: The key to store the entry/entries in
        :param value: The value to store.
        """
        e = {key: value}
        if flatten_keys:
            e = flatten(e)
        root_tree = self._get_object(ROOT_PATH, branch)
        merge_heads = []
        if not root_tree:
            root_tree = self._get_object(ROOT_PATH)
            merge_heads = [self.branch_head('master')]
        blobs=[]
        msg = ''
        for (key, value) in e.iteritems():
            blob = Blob.from_string(self.serializer.dumps(value))
            self.repo.object_store.add_object(blob)
            blobs.append((key, blob.id, stat.S_IFREG))
            msg += "Put %s\n" % key
        root_id = self._add_tree(root_tree, blobs)
        sha = self.repo.do_commit(
            tree=root_id, message=msg,
            ref=self._branch_ref_name(branch),
            merge_heads=merge_heads,
            author=author,
            committer=committer
        )
        return {'sha': sha}

    def delete(self, key, branch='master', author=None, committer=None):
        """
        Delete one or more entries from the store.  The key param can refer to either
        a Tree or Blob in the store.  If it refers to a Blob, then just that entry will be
        removed.  If it refers to a Tree, then that entire subtree will be removed.

        :param key: The key to remove from the store.
        """
        tree = self._get_object(key, branch)
        merge_heads = []
        delete_branch = branch
        if not tree:
            merge_heads = [self.branch_head('master')]
            delete_branch = 'master'
        root = self._delete(key, delete_branch)
        sha = self.repo.do_commit(
            tree=root.id,
            message="Delete %s" % key,
            ref=self._branch_ref_name(branch),
            merge_heads=merge_heads,
            author=author,
            committer=committer
        )
        return {'sha': sha}

    def _delete(self, key, branch='master'):
        trees={}
        path = key
        if path:
            while path:
                (path, name) = pathsplit(path)
                trees[path] = self._get_object(path, branch)
        else:
            trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch)
        (path, name) = pathsplit(key)
        if name:
            del trees[path][name]
        else:
            for entry in trees[path].iteritems():
                del trees[path][entry.path]
        if path:
            while path:
                (parent_path, name) = pathsplit(path)
                trees[parent_path].add(name, stat.S_IFDIR, trees[path].id)
                self.repo.object_store.add_object(trees[path])
                path = parent_path
            self.repo.object_store.add_object(trees[ROOT_PATH])
        else:
            self.repo.object_store.add_object(trees[ROOT_PATH])
        return trees[ROOT_PATH]

    def _repo_tree(self, commit_sha):
        return self.repo[commit_sha].tree

    def keys(self, path=ROOT_PATH, pattern=None, depth=None, filter_by=None, branch='master', commit_sha=None):
        """
        Returns a list of keys from the store.  The path param can be used to scope the
        request to return keys from a subset of the tree.  The filter_by param can be used
        to control whether to return keys for Blob nodes, Tree nodes or all nodes.  Default
        is to return all node keys from the root of the store.

        :param path: The starting point retrieve key paths from.  Default is '' which
        starts from the root of the store.
        :param filter_by: Either 'blob', 'tree' or None.  Controls what type of node key
        paths to return.  Default is None which returns all node type key paths
        :param branch: The branch name to return key paths for.
        :return: A list of keys sorted lexically.
        """
        if filter_by == 'blob':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob)
        elif filter_by == 'tree':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree)
        else:
            filter_fn = None
        return map(lambda x: x[0], filter(filter_fn, self.raw_entries(path, pattern, depth, branch, commit_sha)))

    def entries(self, path=ROOT_PATH, pattern=None, depth=None, branch='master', commit_sha=None):
        for key, obj in self.raw_entries(path, pattern, depth, branch, commit_sha):
            if isinstance(obj, Blob):
                yield (key, self.serializer.loads(str(obj.data)))

    def raw_entries(self, path=ROOT_PATH, pattern=None, depth=None, branch='master', commit_sha=None):
        """
        Returns a generator that traverses the tree and produces entries of the form
        (tree_path, git_object), where tree_path is a string representing a key into the
        store and git_object is either a git Blob or Tree object.

        :param path: String key to begin producing result entries from.  Defaults to
        '' which starts from the root of the store.
        :param pattern: Regex pattern to filter matching tree paths.
        :param depth: Specifies how deep to recurse when producing results.  Default is None which
        does full tree traversal.
        :param branch: Git branch name to return key paths for.  Defaults to HEAD.
        :return: A generator that produces entries of the form (tree_path, git_object)
        """
        tree = self._get_object(path, branch, commit_sha)
        if not isinstance(tree, Tree):
            raise ValueError("Path %s is not a tree!" % path)
        else:
            if not pattern:
                pattern = MATCH_ALL
            return self._entries(path, tree, pattern, depth)

    def _entries(self, path, tree, pattern, depth=None):
        for tree_entry in tree.iteritems():
            obj = self.repo[tree_entry.sha]
            key = self._tree_entry_key(path, tree_entry)
            if pattern.match(key):
                yield (key, obj)
            if isinstance(obj, Tree):
                if not depth:
                    for te in self._entries(key, obj, pattern, depth):
                        yield te
                else:
                    if depth > 1:
                        for te in self._entries(key, obj, pattern, depth-1):
                            yield te

    def trees(self, path=ROOT_PATH, pattern=None, depth=None, object_depth=None, branch='master', commit_sha=None):
        """
        Returns a python dict representation of the store.  The resulting dict can be
        scoped to a particular subtree in the store with the tree or path params.  The
        tree param is a git Tree object to begin from, while the path is a string key
        to begin from.  The branch param is used to specify the git branch name
        to build the dict from.

        :param path: Option string key to begin building the dict from.  Defaults to
        '' which starts from the root of the store.
        :param pattern: Regex pattern to filter matching tree paths.
        :param depth: Specifies how deep to recurse when producing results.  Default is None which
        does full tree traversal.
        :param branch: Optional git branch name to return key paths from.
        Defaults to HEAD.
        :return: A dict represents a section of the store.
        """
        tree = {}
        for path, value in self.entries(path, pattern, depth, branch, commit_sha):
            expand_tree(path, value, tree, object_depth)
        return tree

    def _tree_entry_key(self, path, tree_entry):
        if path:
            return "%s/%s" % (path, tree_entry.path)
        else:
            return tree_entry.path

    def _branch_ref_name(self, name):
        if name.startswith('refs/heads/'):
            return name
        else:
            return "refs/heads/%s" % name

    def branch_head(self, name):
        return self.repo.refs[self._branch_ref_name(name)]

    def _add_tree(self, root_tree, blobs):
        """Commit a new tree.

        :param root_tree: Root tree to add trees to
        :param blobs: Iterable over blob path, sha, mode entries
        :return: SHA1 of the created tree.
        """
        trees = {"": {}}
        def add_tree(path):
            if path in trees:
                return trees[path]
            dirname, basename = pathsplit(path)
            t = add_tree(dirname)
            assert isinstance(basename, basestring)
            newtree = {}
            t[basename] = newtree
            trees[path] = newtree
            return newtree

        for path, sha, mode in blobs:
            tree_path, basename = pathsplit(path)
            tree = add_tree(tree_path)
            tree[basename] = (mode, sha)

        def build_tree(path):
            if path:
                tree = self._get_object(path)
                if not tree:
                    tree = Tree()
                if not isinstance(tree, Tree):
                    self.delete(path)
                    tree = Tree()
            else:
                tree = root_tree
            for basename, entry in trees[path].iteritems():
                if type(entry) == dict:
                    mode = stat.S_IFDIR
                    sha = build_tree(pathjoin(path, basename))
                else:
                    (mode, sha) = entry
                tree.add(basename, mode, sha)
            self.repo.object_store.add_object(tree)
            return tree.id
        return build_tree("")
예제 #31
0
class GitStorage():
    def _ignoreFile(self, dirName, fileName):
        """
        used for the copTree stuff
        ``dirName``
            the working directory
        ``fileName``
            list of files inside the directory (dirName)
        """
        result = []
        for i in fileName:
            path = dirName + i
            if path not in fileToIgnore:
                result.append(path)
        return result

    def _commit(self, tree):
        """
        commit a tree used only by the init
        ``tree``
            tree to commit
        """
        commit = Commit()
        commit.tree = tree.id
        commit.encoding = "UTF-8"
        commit.committer = commit.author = 'debexpo <%s>' % (
            pylons.config['debexpo.email'])
        commit.commit_time = commit.author_time = int(time())
        tz = parse_timezone('-0200')[0]
        commit.commit_timezone = commit.author_timezone = tz
        commit.message = " "
        self.repo.object_store.add_object(tree)
        self.repo.object_store.add_object(commit)
        self.repo.refs["HEAD"] = commit.id
        log.debug('commiting')
        return commit.id

    def __init__(self, path):
        #creating the repository
        if os.path.isdir(path):
            log.debug("directory exist, taking it as a git repository")
            self.repo = Repo(path)
        else:
            log.debug("directory doesn't exist, creating")
            os.makedirs(path)
            log.debug("initiate the repo")
            self.repo = Repo.init(path)
            log.debug("adding an empty tree to the repository")
            self._commit(Tree())

    #only this function will be used on upload
    def change(self, files):
        """
        used to change  afile in the git storage can be called for the first upload we don't care
        ``files``
            a list of file to change
        """
        if len(files) == 0:
            log.debug("trying to change nothing will do... nothing")
        else:
            log.debug("this will change %i files" % (len(files)))
            for f in files:
                self.repo.stage(str(f))
            log.debug("stages dones")
            self.repo.do_commit(
                "this is so awesome that nobody will never see it",
                committer="same here <*****@*****.**>")

    def buildTreeDiff(self, dest, tree=None, originalTree=None):
        """
        creating files from the diff between 2 trees, it will be used in the code browser to get older version
        (walking on history)
        ``tree``
            the tree that you want to compare to
        ``dest``
            the destination folder to build sources
        ``originalTree``
            the original Tree, by default it's the last one

        by default it's retun the last changed files

        """
        if tree is None:
            head = self.repo.commit(
                self.repo.commit(self.repo.head()).parents[0])
            tree = self.repo.tree(head.tree)
        if originalTree is None:
            originalTree = self.repo.tree(
                self.repo.commit(self.repo.head()).tree)
        blobToBuild = []
        #getting blob that have changed
        for blob in self.repo.object_store.iter_tree_contents(tree.id):
            if blob not in originalTree:
                blobToBuild.append(blob)
                fileToIgnore.append(blob.path)
        repoLocation = os.path.join(str(self.repo).split("'")[1])
        #creating the folder with link to older files
        if os.path.exists(repoLocation + dest):
            log.warning("%s already exist, copy will not work")
        else:
            log.debug("copying files")
            shutil.copytree(repoLocation,
                            repoLocation + dest,
                            symlinks=True,
                            ignore=self._ignoreFile)
        for b in blobToBuild:
            fileDirectory = os.path.split(b.path)
            fileDirectory.pop()
            if not os.path.exists(
                    os.path.join(repoLocation + dest,
                                 os.path.join(fileDirectory))):
                os.makedirs(
                    os.path.join(repoLocation + dest,
                                 os.path.join(fileDirectory)))
            file = open(os.path.join(repoLocation + dest, b.path), 'w')
            file.write(self.repo.get_object(b.sha).as_raw_string())
            file.close()
        tree = None
        originalTree = None

    #get*
    def getLastTree(self):
        """
        return the last tree
        """
        return self.repo.tree(self.repo._commit(self.repo.head()).tree)

    def getAllTrees(self):
        """
        return trees
        """
        result = []
        commit = self.repo._commit(self.repo.head())
        for c in commit._get_parents():
            result.append(c.tree)
        return result

    def getOlderFileContent(self, file):
        """
        return the first file's content that changed from the file
        ``file``
            the file to work on
        """
        with open(file) as f:
            originalBlob = Blob.from_string("".join(f.readlines()))
        trees = self.getAllTrees()
        for t in trees:
            #parsing tree in order to find the tree where the file change
            if originalBlob not in t:
                tree = t
                break
                #tree must be existent, other way file is not correct
        if tree is None:
            log.error(
                "there is no tree that contain this blob this souldn't happen, other way this file does not appear to come from this package"
            )
        else:
            if self.repo._commit(self.repo.head()).tree == tree:
                olderTree = self.repo.commit(
                    self.repo.head())._get_parents()[0].tree
            else:
                for c in self.repo._commit(self.repo.head())._get_parents():
                    if c.tree == tree:
                        try:
                            olderTree = c.get_parents()[0]
                        except IndexError:
                            log.debug("file is the last version")
                            olderTree = tree
            if olderTree != tree:
                #we must check here the blob that contains the older file
                for b in self.repo.object_store.iter_tree_contents(
                        olderTree.id):
                    if originalBlob.path == b.path:
                        #older blob find! awesome, in the first loop we already test if they are the same
                        # that's why we can now return the content of the file
                        return self.repo.get_object(b.sha).as_raw_string()
        return ""

    def getOlderCommits(self):
        """
        return a list of all commits
        """
        return self.repo.commit(self.repo.head())._get_parents()
예제 #32
0
class GitUiOpts:
    def __init__(self, path, remote_target="origin"):
        self.committer = None
        self.repo_path = path
        self.repo = Repo(self.repo_path)
        self.unstaged = []
        self.staged = []
        self.remote_url = self.repo.get_config().get(('remote', remote_target),
                                                     'url').decode()
        self.remote_url_credentials = None

    def get_unstaged(self):
        status = porcelain.status(self.repo.path)
        for x in np.concatenate((status.untracked, status.unstaged)):
            try:
                x = x.decode()
            except:
                pass
            finally:
                self.unstaged.append(x)

    def get_staged(self):
        staged = porcelain.status(self.repo.path).staged
        for type_file in ['delete', 'add', 'modify']:
            for filepath in staged[type_file]:
                self.staged.append({
                    "type": type_file,
                    "path": filepath.decode()
                })

    def stage_file(self, filepath):
        if filepath in self.unstaged:
            self.repo.stage([filepath])

    def commit_all_files(self, commit_title):
        self.repo.do_commit(commit_title.encode(),
                            committer=self.committer.encode())
        print(commit_title)

    def commit_file(self):
        self.get_staged()
        for file_to_commit in self.staged:
            commit_title = '{} {}'.format(
                file_to_commit['type'], file_to_commit['path'].split('/')[-1])
            self.repo.do_commit(commit_title.encode(),
                                committer=self.committer.encode())
            print(commit_title)

    def push_once(self):
        remote_url = self.remote_url if self.remote_url_credentials is None else self.remote_url_credentials
        porcelain.push(self.repo,
                       remote_location=remote_url,
                       refspecs="master")
        self.staged = []

    def push(self):
        is_pushed = False
        while is_pushed is False:
            try:
                self.push_once()
            except:
                username = self.simple_input(content="Username : "******"Password : "******"//{0}:{1}@".format(
                    username, password).join(self.remote_url.split('//'))
                self.push_once()
            finally:
                is_pushed = True

    def get_committer(self):
        username, mail = "", ""
        result = subprocess.run(["git", "config", "--list"],
                                stdout=subprocess.PIPE)
        for row in result.stdout.decode().split("\n"):
            row_formatted = row.split("=")
            if len(row_formatted) == 2:
                row_key = row_formatted[0]
                row_value = row_formatted[1]
                if row_key == "user.name":
                    username = row_value
                elif row_key == "user.email":
                    mail = row_value

        return username, mail

    def simple_input(self, content):
        print(content)
        return input()

    def select_input(self, keyword, message, choices):
        questions = [
            inquirer.List(
                keyword,
                message=message,
                choices=choices,
            ),
        ]
        answers = inquirer.prompt(questions)
        return answers[keyword]
예제 #33
0
파일: gittle.py 프로젝트: rubik/gittle
class Gittle(object):
    """All paths used in Gittle external methods must be paths relative to the git repository
    """

    DEFAULT_COMMIT = "HEAD"
    DEFAULT_BRANCH = "master"
    DEFAULT_REMOTE = "origin"
    DEFAULT_MESSAGE = "**No Message**"
    DEFAULT_USER_INFO = {"name": None, "email": None}

    DIFF_FUNCTIONS = {
        "classic": utils.git.classic_tree_diff,
        "dict": utils.git.dict_tree_diff,
        "changes": utils.git.dict_tree_diff,
    }
    DEFAULT_DIFF_TYPE = "dict"

    HIDDEN_REGEXES = [
        # Hide git directory
        r".*\/\.git\/.*"
    ]

    # References
    REFS_BRANCHES = "refs/heads/"
    REFS_REMOTES = "refs/remotes/"
    REFS_TAGS = "refs/tags/"

    # Name pattern truths
    # Used for detecting if files are :
    # - deleted
    # - added
    # - changed
    PATTERN_ADDED = (False, True)
    PATTERN_REMOVED = (True, False)
    PATTERN_MODIFIED = (True, True)

    # Permissions
    MODE_DIRECTORY = 040000  # Used to tell if a tree entry is a directory

    # Tree depth
    MAX_TREE_DEPTH = 1000

    # Acceptable Root paths
    ROOT_PATHS = (os.path.curdir, os.path.sep)

    def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs):
        if isinstance(repo_or_path, DulwichRepo):
            self.repo = repo_or_path
        elif isinstance(repo_or_path, Gittle):
            self.repo = DulwichRepo(repo_or_path.path)
        elif isinstance(repo_or_path, basestring):
            path = os.path.abspath(repo_or_path)
            self.repo = DulwichRepo(path)
        else:
            logging.warning("Repo is of type %s" % type(repo_or_path))
            raise Exception("Gittle must be initialized with either a dulwich repository or a string to the path")

        # Set path
        self.path = self.repo.path

        # The remote url
        self.origin_uri = origin_uri

        # Report client activty
        self._report_activity = report_activity

        # Build ignore filter
        self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES)
        self.hidden_regexes.extend(self._get_ignore_regexes())
        self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes)
        self.filters = [self.ignore_filter]

        # Get authenticator
        if auth:
            self.authenticator = auth
        else:
            self.auth(*args, **kwargs)

    def report_activity(self, *args, **kwargs):
        if not self._report_activity:
            return
        return self._report_activity(*args, **kwargs)

    def _format_author(self, name, email):
        return "%s <%s>" % (name, email)

    def _format_userinfo(self, userinfo):
        name = userinfo.get("name")
        email = userinfo.get("email")
        if name and email:
            return self._format_author(name, email)
        return None

    def _format_ref(self, base, extra):
        return "".join([base, extra])

    def _format_ref_branch(self, branch_name):
        return self._format_ref(self.REFS_BRANCHES, branch_name)

    def _format_ref_remote(self, remote_name):
        return self._format_ref(self.REFS_REMOTES, remote_name)

    def _format_ref_tag(self, tag_name):
        return self._format_ref(self.REFS_TAGS, tag_name)

    @property
    def head(self):
        """Return SHA of the current HEAD
        """
        return self.repo.head()

    @property
    def is_bare(self):
        """Bare repositories have no working directories or indexes
        """
        return self.repo.bare

    @property
    def is_working(self):
        return not (self.is_bare)

    def has_index(self):
        """Opposite of is_bare
        """
        return self.repo.has_index()

    @property
    def has_commits(self):
        """
        If the repository has no HEAD we consider that is has no commits
        """
        try:
            self.repo.head()
        except KeyError:
            return False
        return True

    def ref_walker(self, ref=None):
        """
        Very simple, basic walker
        """
        ref = ref or "HEAD"
        sha = self._commit_sha(ref)
        return self.repo.revision_history(sha)

    def branch_walker(self, branch):
        branch = branch or self.DEFAULT_BRANCH
        ref = self._format_ref_branch(branch)
        return self.ref_walker(ref)

    def commit_info(self, start=0, end=None, branch=None):
        """Return a generator of commits with all their attached information
        """
        if not self.has_commits:
            return []
        commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)]
        if not end:
            return commits
        return commits[start:end]

    @funky.uniquify
    def recent_contributors(self, n=None, branch=None):
        n = n or 10
        return funky.pluck(self.commit_info(end=n, branch=branch), "author")

    @property
    def commit_count(self):
        try:
            return len(self.ref_walker())
        except KeyError:
            return 0

    def commits(self):
        """Return a list of SHAs for all the concerned commits
        """
        return [commit["sha"] for commit in self.commit_info()]

    @property
    def git_dir(self):
        return self.repo.controldir()

    def auth(self, *args, **kwargs):
        self.authenticator = GittleAuth(*args, **kwargs)
        return self.authenticator

    # Generate a branch selector (used for pushing)
    def _wants_branch(self, branch_name=None):
        branch_name = branch_name or self.DEFAULT_BRANCH
        refs_key = self._format_ref_branch(branch_name)
        sha = self.branches[branch_name]

        def wants_func(old):
            refs_key = self._format_ref_branch(branch_name)
            return {refs_key: sha}

        return wants_func

    def _get_ignore_regexes(self):
        gitignore_filename = os.path.join(self.path, ".gitignore")
        if not os.path.exists(gitignore_filename):
            return []
        lines = open(gitignore_filename).readlines()
        globers = map(lambda line: line.rstrip(), lines)
        return utils.paths.globers_to_regex(globers)

    # Get the absolute path for a file in the git repo
    def abspath(self, repo_file):
        return os.path.abspath(os.path.join(self.path, repo_file))

    # Get the relative path from the absolute path
    def relpath(self, abspath):
        return os.path.relpath(abspath, self.path)

    @property
    def last_commit(self):
        return self[self.repo.head()]

    @property
    def index(self):
        return self.repo.open_index()

    @classmethod
    def init(cls, path, bare=None, *args, **kwargs):
        """Initialize a repository"""
        mkdir_safe(path)

        # Constructor to use
        if bare:
            constructor = DulwichRepo.init_bare
        else:
            constructor = DulwichRepo.init

        # Create dulwich repo
        repo = constructor(path)

        # Create Gittle repo
        return cls(repo, *args, **kwargs)

    @classmethod
    def init_bare(cls, *args, **kwargs):
        kwargs.setdefault("bare", True)
        return cls.init(*args, **kwargs)

    def get_client(self, origin_uri=None, **kwargs):
        # Get the remote URL
        origin_uri = origin_uri or self.origin_uri

        # Fail if inexistant
        if not origin_uri:
            raise InvalidRemoteUrl()

        client_kwargs = {}
        auth_kwargs = self.authenticator.kwargs()

        client_kwargs.update(auth_kwargs)
        client_kwargs.update(kwargs)
        client_kwargs.update({"report_activity": self.report_activity})

        client, remote_path = get_transport_and_path(origin_uri, **client_kwargs)
        return client, remote_path

    def push_to(self, origin_uri, branch_name=None, progress=None, progress_stderr=None):
        selector = self._wants_branch(branch_name=branch_name)
        client, remote_path = self.get_client(origin_uri, progress_stderr=progress_stderr)
        return client.send_pack(remote_path, selector, self.repo.object_store.generate_pack_contents, progress=progress)

    # Like: git push
    def push(self, origin_uri=None, branch_name=None, progress=None, progress_stderr=None):
        return self.push_to(origin_uri, branch_name, progress, progress_stderr)

    # Not recommended at ALL ... !!!
    def dirty_pull_from(self, origin_uri, branch_name=None):
        # Remove all previously existing data
        rmtree(self.path)
        mkdir_safe(self.path)
        self.repo = DulwichRepo.init(self.path)

        # Fetch brand new copy from remote
        return self.pull_from(origin_uri, branch_name)

    def pull_from(self, origin_uri, branch_name=None):
        return self.fetch(origin_uri)

    # Like: git pull
    def pull(self, origin_uri=None, branch_name=None):
        return self.pull_from(origin_uri, branch_name)

    def fetch_remote(self, origin_uri=None):
        # Get client
        client, remote_path = self.get_client(origin_uri=origin_uri)

        # Fetch data from remote repository
        remote_refs = client.fetch(remote_path, self.repo)

        return remote_refs

    def _setup_fetched_refs(self, refs, origin, bare):
        remote_tags = utils.git.subrefs(refs, "refs/tags")
        remote_heads = utils.git.subrefs(refs, "refs/heads")

        # Filter refs
        clean_remote_tags = utils.git.clean_refs(remote_tags)
        clean_remote_heads = utils.git.clean_refs(remote_heads)

        # Base of new refs
        heads_base = "refs/remotes/" + origin
        if bare:
            heads_base = "refs/heads"

        # Import branches
        self.import_refs(heads_base, clean_remote_heads)

        # Import tags
        self.import_refs("refs/tags", clean_remote_tags)

        # Update HEAD
        self["HEAD"] = refs["HEAD"]

    def fetch(self, origin_uri=None, bare=None, origin=None):
        bare = bare or False
        origin = origin or self.DEFAULT_REMOTE

        # Remote refs
        remote_refs = self.fetch_remote(origin_uri)

        # Update head
        # Hit repo because head doesn't yet exist so
        # print("REFS = %s" % remote_refs)

        # Update refs (branches, tags, HEAD)
        self._setup_fetched_refs(remote_refs, origin, bare)

        # Checkout working directories
        if not bare:
            self.checkout_all()
        else:
            self.update_server_info()

    @classmethod
    def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs):
        """Clone a remote repository"""
        mkdir_safe(local_path)

        # Initialize the local repository
        if bare:
            local_repo = cls.init_bare(local_path)
        else:
            local_repo = cls.init(local_path)

        repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs)

        repo.fetch(bare=bare)

        # Add origin
        # TODO

        return repo

    @classmethod
    def clone_bare(cls, *args, **kwargs):
        """Same as .clone except clones to a bare repository by default
        """
        kwargs.setdefault("bare", True)
        return cls.clone(*args, **kwargs)

    def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs):

        if not tree:
            # If no tree then stage files
            modified_files = files or self.modified_files
            logging.warning("STAGING : %s" % modified_files)
            self.add(modified_files)

        # Messages
        message = message or self.DEFAULT_MESSAGE
        author_msg = self._format_userinfo(author)
        committer_msg = self._format_userinfo(committer)

        return self.repo.do_commit(
            message=message, author=author_msg, committer=committer_msg, encoding="UTF-8", tree=tree, *args, **kwargs
        )

    def _tree_from_structure(self, structure):
        # TODO : Support directories
        tree = Tree()

        for file_info in structure:

            # str only
            try:
                data = file_info["data"].encode("ascii")
                name = file_info["name"].encode("ascii")
                mode = file_info["mode"]
            except:
                # Skip file on encoding errors
                continue

            blob = Blob()

            blob.data = data

            # Store file's contents
            self.repo.object_store.add_object(blob)

            # Add blob entry
            tree.add(name, mode, blob.id)

        # Store tree
        self.repo.object_store.add_object(tree)

        return tree.id

    # Like: git commmit -a
    def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs):
        user_info = {"name": name, "email": email}
        return self._commit(committer=user_info, author=user_info, message=message, files=files, *args, **kwargs)

    def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs):
        """Main use is to do commits directly to bare repositories
        For example doing a first Initial Commit so the repo can be cloned and worked on right away
        """
        if not structure:
            return
        tree = self._tree_from_structure(structure)

        user_info = {"name": name, "email": email}

        return self._commit(committer=user_info, author=user_info, message=message, tree=tree, *args, **kwargs)

    # Push all local commits
    # and pull all remote commits
    def sync(self, origin_uri=None):
        self.push(origin_uri)
        return self.pull(origin_uri)

    def lookup_entry(self, relpath, trackable_files=set()):
        if not relpath in trackable_files:
            raise KeyError

        abspath = self.abspath(relpath)

        with open(abspath, "rb") as git_file:
            data = git_file.read()
            s = sha1()
            s.update("blob %u\0" % len(data))
            s.update(data)
        return (s.hexdigest(), os.stat(abspath).st_mode)

    @property
    @funky.transform(set)
    def tracked_files(self):
        return list(self.index)

    @property
    @funky.transform(set)
    def raw_files(self):
        return utils.paths.subpaths(self.path)

    @property
    @funky.transform(set)
    def ignored_files(self):
        return utils.paths.subpaths(self.path, filters=self.filters)

    @property
    @funky.transform(set)
    def trackable_files(self):
        return self.raw_files - self.ignored_files

    @property
    @funky.transform(set)
    def untracked_files(self):
        return self.trackable_files - self.tracked_files

    """
    @property
    @funky.transform(set)
    def modified_staged_files(self):
        "Checks if the file has changed since last commit"
        timestamp = self.last_commit.commit_time
        index = self.index
        return [
            f
            for f in self.tracked_files
            if index[f][1][0] > timestamp
        ]
    """

    # Return a list of tuples
    # representing the changed elements in the git tree
    def _changed_entries(self, ref=None):
        ref = ref or self.DEFAULT_COMMIT
        if not self.has_commits:
            return []
        obj_sto = self.repo.object_store
        tree_id = self[ref].tree
        names = self.trackable_files

        lookup_func = partial(self.lookup_entry, trackable_files=names)

        # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...]
        tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False)
        return list(tree_diff)

    @funky.transform(set)
    def _changed_entries_by_pattern(self, pattern):
        changed_entries = self._changed_entries()
        filtered_paths = [
            funky.first_true(names)
            for names, modes, sha in changed_entries
            if tuple(map(bool, names)) == pattern and funky.first_true(names)
        ]

        return filtered_paths

    @property
    @funky.transform(set)
    def removed_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files

    @property
    @funky.transform(set)
    def added_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files

    @property
    @funky.transform(set)
    def modified_files(self):
        modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files
        return modified_files

    @property
    @funky.transform(set)
    def modified_unstaged_files(self):
        timestamp = self.last_commit.commit_time
        return [f for f in self.tracked_files if os.stat(self.abspath(f)).st_mtime > timestamp]

    @property
    def pending_files(self):
        """
        Returns a list of all files that could be possibly staged
        """
        # Union of both
        return self.modified_files | self.added_files | self.removed_files

    @property
    def pending_files_by_state(self):
        files = {"modified": self.modified_files, "added": self.added_files, "removed": self.removed_files}

        # "Flip" the dictionary
        return {path: state for state, paths in files.items() for path in paths}

    """
    @property
    @funky.transform(set)
    def modified_files(self):
        return self.modified_staged_files | self.modified_unstaged_files
    """

    # Like: git add
    @funky.arglist_method
    def stage(self, files):
        return self.repo.stage(files)

    def add(self, *args, **kwargs):
        return self.stage(*args, **kwargs)

    # Like: git rm
    @funky.arglist_method
    def rm(self, files, force=False):
        index = self.index
        index_files = filter(lambda f: f in index, files)
        for f in index_files:
            del self.index[f]
        return index.write()

    def mv_fs(self, file_pair):
        old_name, new_name = file_pair
        os.rename(old_name, new_name)

    # Like: git mv
    @funky.arglist_method
    def mv(self, files_pair):
        index = self.index
        files_in_index = filter(lambda f: f[0] in index, files_pair)
        map(self.mv_fs, files_in_index)
        old_files = map(funky.first, files_in_index)
        new_files = map(funky.last, files_in_index)
        self.add(new_files)
        self.rm(old_files)
        self.add(old_files)
        return

    @working_only
    def _checkout_tree(self, tree):
        return build_index_from_tree(self.repo.path, self.repo.index_path(), self.repo.object_store, tree)

    def checkout_all(self, commit_sha=None):
        commit_sha = commit_sha or self.head
        commit_tree = self._commit_tree(commit_sha)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    def checkout(self, commit_sha=None, files=None):
        """Checkout only a select amount of files
        """
        commit_sha = commit_sha or self.head
        files = files or []

        return self

    @funky.arglist_method
    def reset(self, files, commit="HEAD"):
        pass

    def rm_all(self):
        self.index.clear()
        return self.index.write()

    def _to_commit(self, commit_obj):
        """Allows methods to accept both SHA's or dulwich Commit objects as arguments
        """
        if isinstance(commit_obj, basestring):
            return self.repo[commit_obj]
        return commit_obj

    def _commit_sha(self, commit_obj):
        """Extracts a Dulwich commits SHA
        """
        if utils.git.is_sha(commit_obj):
            return commit_obj
        elif isinstance(commit_obj, basestring):
            # Can't use self[commit_obj] to avoid infinite recursion
            commit_obj = self.repo[commit_obj]
        return commit_obj.id

    def _blob_data(self, sha):
        """Return a blobs content for a given SHA
        """
        return self[sha].data

    # Get the nth parent back for a given commit
    def get_parent_commit(self, commit, n=None):
        """ Recursively gets the nth parent for a given commit
            Warning: Remember that parents aren't the previous commits
        """
        if n is None:
            n = 1
        commit = self._to_commit(commit)
        parents = commit.parents

        if n <= 0 or not parents:
            # Return a SHA
            return self._commit_sha(commit)

        parent_sha = parents[0]
        parent = self[parent_sha]

        # Recur
        return self.get_parent_commit(parent, n - 1)

    def get_previous_commit(self, commit_ref, n=None):
        commit_sha = self._parse_reference(commit_ref)
        n = n or 1
        commits = self.commits()
        return funky.next(commits, commit_sha, n=n, default=commit_sha)

    def _parse_reference(self, ref_string):
        # COMMIT_REF~x
        if "~" in ref_string:
            ref, count = ref_string.split("~")
            count = int(count)
            commit_sha = self._commit_sha(ref)
            return self.get_previous_commit(commit_sha, count)
        return self._commit_sha(ref_string)

    def _commit_tree(self, commit_sha):
        """Return the tree object for a given commit
        """
        return self[commit_sha].tree

    def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True):
        diff_type = diff_type or self.DEFAULT_DIFF_TYPE
        diff_func = self.DIFF_FUNCTIONS[diff_type]

        if not compare_to:
            compare_to = self.get_previous_commit(commit_sha)

        return self._diff_between(compare_to, commit_sha, diff_function=diff_func)

    def diff_working(self, ref=None, filter_binary=True):
        """Diff between the current working directory and the HEAD
        """
        return utils.git.diff_changes_paths(
            self.repo.object_store, self.path, self._changed_entries(ref=ref), filter_binary=filter_binary
        )

    def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None):
        """Returns a dict of the following Format :
            {
                "directory/filename.txt": {
                    'name': 'filename.txt',
                    'path': "directory/filename.txt",
                    "sha": "xxxxxxxxxxxxxxxxxxxx",
                    "data": "blablabla",
                    "mode": 0xxxxx",
                },
                ...
            }
        """
        # Default values
        context = {}
        is_tree = is_tree or False
        parent_path = parent_path or ""

        if is_tree:
            tree = self[commit_sha]
        else:
            tree = self[self._commit_tree(commit_sha)]

        for mode, path, sha in tree.entries():
            # Check if entry is a directory
            if mode == self.MODE_DIRECTORY:
                context.update(
                    self.get_commit_files(sha, parent_path=os.path.join(parent_path, path), is_tree=True, paths=paths)
                )
                continue

            subpath = os.path.join(parent_path, path)

            # Only add the files we want
            if not (paths is None or subpath in paths):
                continue

            # Add file entry
            context[subpath] = {"name": path, "path": subpath, "mode": mode, "sha": sha, "data": self._blob_data(sha)}
        return context

    def file_versions(self, path):
        """Returns all commits where given file was modified
        """
        versions = []
        commits_info = self.commit_info()
        seen_shas = set()

        for commit in commits_info:
            try:
                files = self.get_commit_files(commit["sha"], paths=[path])
                file_path, file_data = files.items()[0]
            except IndexError:
                continue

            file_sha = file_data["sha"]

            if file_sha in seen_shas:
                continue
            else:
                seen_shas.add(file_sha)

            # Add file info
            commit["file"] = file_data
            versions.append(file_data)
        return versions

    def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True):
        """Internal method for getting a diff between two commits
            Please use .diff method unless you have very speciic needs
        """

        # If commit is first commit (new_commit_sha == old_commit_sha)
        # then compare to an empty tree
        if new_commit_sha == old_commit_sha:
            old_tree = Tree()
        else:
            old_tree = self._commit_tree(old_commit_sha)

        new_tree = self._commit_tree(new_commit_sha)

        return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary)

    def changes(self, *args, **kwargs):
        """ List of changes between two SHAs
            Returns a list of lists of tuples :
            [
                [
                    (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
                ],
                ...
            ]
        """
        kwargs["diff_type"] = "changes"
        return self.diff(*args, **kwargs)

    def changes_count(self, *args, **kwargs):
        return len(self.changes(*args, **kwargs))

    def _refs_by_pattern(self, pattern):
        refs = self.refs

        def item_filter(key_value):
            """Filter only concered refs"""
            key, value = key_value
            return key.startswith(pattern)

        def item_map(key_value):
            """Rewrite keys"""
            key, value = key_value
            new_key = key[len(pattern) :]
            return (new_key, value)

        return dict(map(item_map, filter(item_filter, refs.items())))

    @property
    def refs(self):
        return self.repo.get_refs()

    def set_refs(refs_dict):
        for k, v in refs_dict.items():
            self.repo[k] = v

    def import_refs(self, base, other):
        return self.repo.refs.import_refs(base, other)

    @property
    def branches(self):
        return self._refs_by_pattern(self.REFS_BRANCHES)

    def _active_branch(self, refs=None, head=None):
        head = head or self.head
        refs = refs or self.branches
        try:
            return {branch: branch_head for branch, branch_head in refs.items() if branch_head == head}.items()[0]
        except IndexError:
            pass
        return (None, None)

    @property
    def active_branch(self):
        return self._active_branch()[0]

    @property
    def active_sha(self):
        return self._active_branch()[1]

    @property
    def remote_branches(self):
        return self._refs_by_pattern(self.REFS_REMOTES)

    @property
    def tags(self):
        return self._refs_by_pattern(self.REFS_TAGS)

    @property
    def remotes(self):
        """ Dict of remotes
        {
            'origin': 'http://friendco.de/some_user/repo.git',
            ...
        }
        """
        config = self.repo.get_config()
        return {keys[1]: values["url"] for keys, values in config.items() if keys[0] == "remote"}

    def add_ref(self, new_ref, old_ref):
        self.repo.refs[new_ref] = self.repo.refs[old_ref]
        self.update_server_info()

    def remove_ref(self, ref_name):
        # Returns False if ref doesn't exist
        if not ref_name in self.repo.refs:
            return False
        del self.repo.refs[ref_name]
        self.update_server_info()
        return True

    def create_branch(self, base_branch, new_branch, tracking=None):
        """Try creating a new branch which tracks the given remote
            if such a branch does not exist then branch off a local branch
        """

        # The remote to track
        tracking = self.DEFAULT_REMOTE

        # Already exists
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        # Get information about remote_branch
        remote_branch = os.path.sep.join([tracking, base_branch])

        # Fork Local
        if base_branch in self.branches:
            base_ref = self._format_ref_branch(base_branch)
        # Fork remote
        elif remote_branch in self.remote_branches:
            base_ref = self._format_ref_remote(remote_branch)
            # TODO : track
        else:
            raise Exception(
                "Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking)
            )

        # Reference of new branch
        new_ref = self._format_ref_branch(new_branch)

        # Copy reference to create branch
        self.add_ref(new_ref, base_ref)

        return new_ref

    def remove_branch(self, branch_name):
        ref = self._format_ref_branch(branch_name)
        return self.remove_ref(ref)

    def switch_branch(self, branch_name, tracking=None, create=None):
        """Changes the current branch
        """
        if create is None:
            create = True

        # Check if branch exists
        if not branch_name in self.branches:
            self.create_branch(branch_name, branch_name, tracking=tracking)

        # Get branch reference
        branch_ref = self._format_ref_branch(branch_name)

        # Change main branch
        self.repo.refs.set_symbolic_ref("HEAD", branch_ref)

        if self.is_working:
            # Remove all files
            self.clean_working()

            # Add files for the current branch
            self.checkout_all()

    def clean(self, force=None, directories=None):
        untracked_files = self.untracked_files
        map(os.remove, untracked_files)
        return untracked_files

    def clean_working(self):
        """Purges all the working (removes everything except .git)
            used by checkout_all to get clean branch switching
        """
        return self.clean()

    def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None):
        tree = self[tree_sha]
        structure = {}
        if depth is None:
            depth = self.MAX_TREE_DEPTH
        elif depth == 0:
            return structure
        for mode, path, sha in tree.entries():
            # tree
            if mode == self.MODE_DIRECTORY:
                # Recur
                structure[path] = self._get_fs_structure(sha, depth=depth - 1, parent_sha=tree_sha)
            # commit
            else:
                structure[path] = sha
        structure["."] = tree_sha
        structure[".."] = parent_sha or tree_sha
        return structure

    def _get_fs_structure_by_path(self, tree_sha, path):
        parts = path.split(os.path.sep)
        depth = len(parts) + 1
        structure = self._get_fs_structure(tree_sha, depth=depth)

        return funky.subkey(structure, parts)

    def commit_ls(self, ref, subpath=None):
        """List a "directory" for a given commit
            using the tree of thqt commit
        """
        tree_sha = self._commit_tree(ref)

        # Root path
        if subpath in self.ROOT_PATHS or not subpath:
            return self._get_fs_structure(tree_sha, depth=1)
        # Any other path
        return self._get_fs_structure_by_path(tree_sha, subpath)

    def commit_file(self, ref, path):
        """Return info on a given file for a given commit
        """
        name, info = self.get_commit_files(ref, paths=[path]).items()[0]
        return info

    def commit_tree(self, ref, *args, **kwargs):
        tree_sha = self._commit_tree(ref)
        return self._get_fs_structure(tree_sha, *args, **kwargs)

    def update_server_info(self):
        if not self.is_bare:
            return
        update_server_info(self.repo)

    def _is_fast_forward(self):
        pass

    def _merge_fast_forward(self):
        pass

    def __hash__(self):
        """This is required otherwise the memoize function will just mess it up
        """
        return hash(self.path)

    def __getitem__(self, key):
        sha = self._parse_reference(key)
        return self.repo[sha]

    def __setitem__(self, key, value):
        self.repo[key] = value

    # Alias to clone_bare
    fork = clone_bare
    log = commit_info
    diff_count = changes_count
    comtributors = recent_contributors
예제 #34
0
class GitHubPagesWriter:
    def __init__(self, *, repo='.', branch='gh-pages', remote='origin'):
        self.repo = Repo(repo)
        self.branch = branch
        self.remote = remote
        self.tree = {}

    @property
    def base_url(self):
        config = self.repo.get_config()
        url = config.get((b'remote', self.remote.encode()), b'url')
        client, path = get_transport_and_path(url.decode())
        url = client.get_url(path)
        o = urlparse(url)
        assert o.hostname == 'github.com'
        path = o.path[1:]
        if path.endswith(".git"):
            path = path[:-4]

        user, repo = path.split('/')
        if repo == f'{user}.github.io':
            return f'https://{user}.github.io'
        else:
            return f'https://{user}.github.io/{repo}'

    def write_file(self, url, content):
        segs = PurePath(url).parts[1:]
        if url.endswith("/"):
            segs += ("index.html", )

        tree = self.tree
        for s in segs[:-1]:
            subtree = tree.get(s, {})
            tree[s] = subtree
            tree = subtree

        blob = Blob.from_string(content)
        self.repo.object_store.add_object(blob)
        tree[segs[-1]] = blob.id

    def write_tree(self, files):
        tree = Tree()
        for name, value in files.items():
            if isinstance(value, dict):
                tree.add(name.encode('utf-8'), 0o040000,
                         self.write_tree(value))
            else:
                tree.add(name.encode('utf-8'), 0o100644, value)
        self.repo.object_store.add_object(tree)
        return tree.id

    def commit(self):
        self.write_file("/.nojekyll", b'')
        tree = self.write_tree(self.tree)
        branch = "refs/heads/" + self.branch
        commit = self.repo.do_commit(message=b'generate GitHub Pages',
                                     tree=tree,
                                     ref=branch.encode())
        self.repo[branch.encode()] = commit

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if exc_type is None:
            self.commit()
예제 #35
0
파일: git.py 프로젝트: ndreynolds/hopper
class Repo(object):
    """
    An abstraction layer on top of dulwich.repo.Repo for higher-level
    git repository actions like:

    * adding only modified files
    * checking out whole trees (or paths within them) from refs
    * diffs with difflib
    * branching and tagging (both displaying and creating)
    * listing commits down from a ref

    Methods are structured to match git commands when appropriate.

    It also supports executing arbitrary git commands, if git is installed.
    Of course, everything else is implemented in pure python, so having git
    installed is optional.

    Should be considered a work-in-progress.
    """

    def __init__(self, path):
        self.repo = DulwichRepo(path) # The inner Dulwich Repo object.
        self.root = path

    @classmethod
    def init(cls, path, mkdir=False, bare=False):
        """
        Initializes a normal or bare repository. This is mostly a
        handoff to Dulwich.
        
        :param path: the path (which must be a directory) to create
                     the repository within.
        :param mkdir: if True, make a directory at **path**. Equivalent 
                      to ``mkdir [path] && cd [path] && git init``.
        :param bare: if True, create a bare repository at the path.

        :return: a ``Repo`` instance.
        """
        if bare:
            DulwichRepo.init_bare(path)
        else:
            DulwichRepo.init(path, mkdir)
        return cls(path)

    def add(self, path=None, all=False, add_new_files=True):
        """
        Add files to the repository or staging area if new or modified. 
        Equivalent to the ``git add`` command. 

        :param path: the path to the file to add, relative to the
            repository root. 
        :param all: if True, add all files under the given path. If 
            **path** is omitted, the repository's root path will be used.
        :param add_new_files: if True, this command will also add new
            files. Note this is the default behavior. The option is 
            provided for situations (e.g. ``git commit -a``) where adding
            new files would be undesirable.

        :return: list of filepaths that were added.
                   
        If **path** is a file and **all** is True, only the single 
        file will be added.
        If **path** is a directory and **all** is False, nothing 
        will be added.
        Likewise, if both **path** and **all** are omitted, nothing 
        will be added.        

        Additionally, the ``add`` method checks to see if the path(s)
        have been modified. We don't want to create new blobs if we 
        don't need them.
        """

        # the implementation creates a list of paths and stages them using 
        # dulwich.Repo.stage

        # Paths are a little tricky. To work with repositories independent
        # of the current working directory, we need absolute paths to files.
        # At the same time, git trees are relative to the repository root.
        # So, we have to do a few conversions.

        adds = []

        # get an absolute path for doing isfile/isdir checks.
        if path is not None:
            path = os.path.join(self.root, path)

        # add all files within given path
        if path is not None and all:
            if os.path.isdir(path):
                # walk the directory
                for directory, dirnames, filenames in os.walk(directory):
                    if '.git' in dirnames:
                        # in case path is root, don't traverse the .git subdir 
                        dirnames.remove('.git')
                    for f in filenames:
                        path = os.path.join(directory, f)
                        adds.append(path)
            elif os.path.isfile(path):
                adds.append(path)
        
        # add all files within root path
        elif path is None and all:
            # walk the root directory
            for directory, dirnames, filenames in os.walk(self.root):
                if '.git' in dirnames:
                    # don't traverse the .git subdir 
                    dirnames.remove('.git')
                for f in filenames:
                    path = os.path.join(directory, f)
                    adds.append(path)

        # add file at path
        elif path is not None:
            # add only if file
            if os.path.isfile(path):
                adds.append(path)

        # back to relative paths, so we can add them to the tree.
        rels = []
        for p in adds:
            # get the path relative to repo root.
            rels.append(os.path.relpath(p, self.root))
        adds = rels

        # filter unmodified files (and untracked files if not add_new_files)
        if add_new_files:
            adds = [f for f in adds if self._file_is_modified(f) or \
                    not self._file_in_tree(f)]
        else:
            adds = [f for f in adds if self._file_is_modified(f)]

        # don't waste time with stage if empty list.
        if adds:
            self.repo.stage(adds)

        return adds

    def branch(self, name=None, ref=None):
        """
        Create a new branch or display the current one. Equivalent to 
        `git branch`.
        
        :param name: the name of the branch
        :param ref: a commit reference (branch, tag, or SHA). Same idea 
                    as the git-branch ``--start-point`` option. Will 
                    create the branch off of the commit. Defaults to HEAD.
        :return: None on create, branch name on display.
        
        When the name param is not given, the current branch will be
        returned as a string using the branch's full name
        (i.e. ``refs/heads/[branch_name]``).
        """
        # create a branch
        if name is not None:
            if ref is None:
                ref = self.head().id
            else:
                ref = self._resolve_ref(ref)
            self.repo.refs['refs/heads/%s' % name] = ref
        # display the name of the current branch
        else:
            # couldn't find an easy way to get it out of dulwich, 
            # which resolves HEAD to the commit, so we'll just read 
            # .git/HEAD directly.
            path = os.path.join(self.repo._controldir, 'HEAD')
            if os.path.isfile(path):
                with open(path, 'r') as fp:
                    return fp.read().strip()[5:]

    def checkout(self, ref, path=None):
        """
        Checkout the entire tree (or a subset) of a commit given a branch, 
        tag, or commit SHA.

        This is a fairly naive implementation. It will just write the blob data
        recursively from the tree pointed at by the given reference, 
        overwriting the working tree as necessary. It doesn't do deletions or 
        renames.

        If you wanted to checkout 'HEAD':
          >>> repo.checkout(repo.head())

        If you wanted to checkout the master branch:
          >>> repo.checkout('master')

        If you wanted to checkout v1.2 (i.e. a tag):
          >>> repo.checkout('v1.2')

        :param ref: branch, tag, or commit
        :param path: checkout only file or directory at path, should be
                     relative to the repo's root. 
        :raises KeyError: if bad reference.
        """
        sha = self._resolve_ref(ref)
        obj = self.repo[sha]
        tree = self.repo[obj.tree]

        if tree is None:
            raise KeyError('Bad reference: %s' % ref)
        if path is None:
            path = self.root

        else:
            # check if path and self.root are same
            if not os.path.samefile(path, self.root):
                # if not, we need the path's tree 
                # (a sub-tree of the commit tree)
                tree = self._obj_from_tree(tree, path)
        
        # write the tree
        self._write_tree_to_wt(tree, path)

    def cmd(self, cmd):
        """
        Run a raw git command from the shell and return any output. Unlike 
        other methods (which depend on Dulwich's git reimplementation and 
        not git itself), this is dependent on the git shell command. 

        The given git subcommand and arguments are prefixed with ``git`` and
        run through the subprocess module.

        To maintain the class's indifference to the current working directory,
        we also prepend the ``--git-dir`` and ``--work-tree`` arguments. 

        :param cmd: A list of command-line arguments (anything the subprocess 
                    module will take).
        :return: a string containing the command's output.

        **Usage** (output has been truncated for brevity):
          >>> repo.cmd(['checkout', '-q', 'master'])
          >>> repo.cmd(['commit', '-q', '-a', '-m', 'Initial Commit'])
          >>> repo.cmd(['remote', '-v'])
          "origin  [email protected]:hopper.git (fetch)\\n\\n origin ..."
          >>> repo.cmd(['log'])
          "commit 68a116eaee458607a3a9cf852df4f358a02bdb92\\nAuthor: Ni..."

        As you can see, it doesn't do any parsing of the output. It's available
        for times when the other methods don't get the job done.
        """

        if not type(cmd) is list:
            raise TypeError('cmd must be a list')
        git_dir = os.path.join(self.root, '.git')
        prefix = ['git', '--git-dir', git_dir, '--work-tree', self.root]
        # It would be nice to use check_output() here, but it's 2.7+
        return subprocess.Popen(prefix + cmd, stdout=subprocess.PIPE).communicate()[0]

    def commit(self, all=False, **kwargs):
        """
        Commit the changeset to the repository.  Equivalent to the 
        `git commit` command.

        This method does a commit; use the ``commits`` method to 
        retrieve one or more commits.

        Uses ``dulwich.objects.BaseRepo.do_commit()``, see that for
        params. At minimum, you need to provide **committer** and 
        **message**. Everything else will be defaulted.

        :param all: commit all modified files that are already being tracked.
        :param \*\*kwargs: the commit attributes (e.g. committer, message,
                         etc.). Again, see the underlying dulwich method.
        """
        
        if all:
            # add all changes (to already tracked files)
            self.add(all=True, add_new_files=False)

        # pass the kwargs to dulwich, get the returned commit id.
        commit_id = self.repo.do_commit(**kwargs)

        # return the Commit object (instead of the id, which is less useful).
        return self.repo[commit_id]

    def commits(self, ref=None, n=10):
        """
        Return up to n-commits down from a ref (branch, tag, commit),
        or if no ref given, down from the HEAD.

        If you just want a single commit, it may be cleaner to use the
        ``object`` method.

        :param ref: a branch, tag (not yet), or commit SHA to use 
                          as a start point.
        :param n: the maximum number of commits to return. If fewer 
                  matching commits exist, only they will be returned.

        :return: a list of ``dulwich.objects.Commit`` objects.

        **Usage**:
          >>> repo.commits()
          [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>,
           <Commit 6336f47615da32d520a8d52223b9817ee50ca728>]
          >>> repo.commits()[0] == repo.head()
          True
          >>> repo.commits(n=1)
          [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>]
          >>> repo.commits('6336f47615da32d520a8d52223b9817ee50ca728', n=1)
          [<Commit 6336f47615da32d520a8d52223b9817ee50ca728>]
        """

        start_point = self.head().id
        if ref is not None:
            start_point = self._resolve_ref(ref)
        return self.repo.revision_history(start_point)[:n]

    def diff(self, a, b=None, path=None):
        """
        Return a diff of commits a and b.

        :param a: a commit identifier.
        :param b: a commit identifier. Defaults to HEAD.
        :param path: a path to a file or directory to diff, relative
                     to the repo root. Defaults to the entire tree.
        """
        if not os.path.isfile(os.path.join(self.root, path)):
            raise NotImplementedError('Specify a file path for now')
        return self._diff_file(path, a, b)

    def head(self):
        """Return the HEAD commit or raise an error."""
        # It seems best to make this a function so we don't have to
        # set and continually update it.
        try:
            return self.repo['HEAD']
        except KeyError:
            # The HEAD will be missing before the repo is committed to.
            raise NoHeadSet

    def is_dirty(self):
        """Return True if there are uncommitted changes to the repository."""
        new, modified, deleted = self.status()
        if new or modified or deleted:
            return True
        return False

    def object(self, sha):
        """
        Retrieve an object from the repository.

        :param sha: the 40-byte hex-rep of the object's SHA1 identifier.
        """
        return self.repo[sha]

    def status(self, from_path=None):
        """
        Compare the working directory with HEAD.

        :param from_path: show changes within this path, which must be a
                          file or directory relative to the repo.
        :return: a tuple containing three lists: new, modified, deleted
        """
        # TODO: also compare the index and HEAD, or the index and WT 

        # use from_path if set, otherwise root.
        if from_path is not None:
            from_path = os.path.join(self.root, from_path)
            if not os.path.exists(from_path):
                raise OSError('from_path does not exist.')
            path = from_path
        else:
            path = self.root

        # store changes in dictionary
        changes = {}
        changes['new'] = []
        changes['modified'] = []
        changes['deleted'] = []
        
        # path is a file
        if os.path.isfile(path):
            status = self._file_status(path)
            if status == FILE_IS_NEW:
                changes['new'].append(path)
            elif status == FILE_IS_MODIFIED:
                changes['modified'].append(path)
            elif status == FILE_IS_DELETED:
                changes['deleted'].append(path)

        # path is a directory
        elif os.path.isdir(path):
            for directory, dirnames, filenames in os.walk(path):
                if '.git' in dirnames:
                    dirnames.remove('.git')
                for f in filenames:
                    fpath = os.path.relpath(os.path.join(directory, f), self.root)
                    status = self._file_status(fpath)
                    if status == FILE_IS_NEW:
                        changes['new'].append(fpath)
                    elif status == FILE_IS_MODIFIED:
                        changes['modified'].append(fpath)
                    elif status == FILE_IS_DELETED:
                        changes['deleted'].append(fpath)

        return changes['new'], changes['modified'], changes['deleted']

    def tag(self, name, ref=None):
        """
        Create a tag.

        :param name: name of the new tag (e.g. 'v1.0' or '1.0.6')
        :param ref: a commit ref to tag, defaults to HEAD.
        """
        # TODO: display tags attached to HEAD when no args.
        if ref is None:
            ref = self.head().id
        ref = self._resolve_ref(ref)
        self.repo.refs['refs/tags/%s' % name] = ref

    def tree(self, sha=None):
        """
        Return the tree with given SHA, or if no SHA given, return the
        HEAD commit's tree. Raise an error if an object matches the SHA, 
        but is not a tree.

        :param sha: tree reference. 
        
        Note that a commit reference would not work. To get a commit's 
        tree, just provide ``c.tree``, which contains the SHA we need.
        """
        if sha is None:
            obj = self.repo[self.head().tree]
        else:
            obj = self.repo[sha]
        if type(obj) is Tree:
            return obj
        else:
            raise NotTreeError('Object is not a Tree')

    def _file_status(self, path, ref=None):
        """
        Checks the status of a file in the working tree relative to a
        commit (usually HEAD). Statuses include: new, modified, and deleted.

        These statuses are conveyed as constants::

        FILE_IS_UNCHANGED = 0
        FILE_IS_NEW       = 1
        FILE_IS_MODIFIED  = 2
        FILE_IS_DELETED   = 3

        :param path: file path relative to the repo
        :param ref: optional ref to compare the WT with, default is HEAD.
        :return: status constant
        """
        full_path = os.path.join(self.root, path)
        in_work_tree = os.path.exists(full_path)
        in_tree = self._file_in_tree(path)

        # new
        if not in_tree and in_work_tree:
            return FILE_IS_NEW
        # deleted
        elif in_tree and not in_work_tree:
            return FILE_IS_DELETED
        # modified
        elif in_tree and in_work_tree and self._file_is_modified(path):
            return FILE_IS_MODIFIED
        # unchanged
        elif in_tree and in_work_tree:
            return FILE_IS_UNCHANGED
        # does not exist (at least in our 2-tree world)
        else:
            raise KeyError('Path not found in either tree.')

    def _file_is_modified(self, path, ref=None):
        """
        Returns True if the current file (in the WT) has been modified from 
        the blob in the commit's tree, False otherwise.

        :param path: path to the file relative to the repository root.
        :param ref: optional ref to compare the WT with, default is HEAD.

        This returns False for new files (not present in the tree). If this
        is unexpected, just call ``_file_in_tree`` first.

        It assumes that the given path does exist. Just expect an OSError
        if it doesn't.
        """
        # handle no head scenario when this gets called before first commit
        try:
            self.head()
        except NoHeadSet:
            return False

        # get the tree
        tree = self.repo[self.head().tree]
        # get the blob from the tree
        blob1 = self._obj_from_tree(tree, path)
        if type(blob1) is not Blob:
            return False

        # make a second blob from the current file
        with open(os.path.join(self.root, path), 'r') as fp:
            blob2 = Blob.from_string(fp.read())
        # are the two blobs equivalent? 
        # if their contents are the same they should be...
        # calls dulwich.objects.ShaFile.__eq__, which just compares SHAs
        return blob1 != blob2

    def _file_in_tree(self, path, ref=None):
        """
        Returns True if the file corresponds to a blob in the HEAD 
        commit's tree, False otherwise.

        :param path: path to the file relative to the repository root.
        :param ref: optional ref to compare the WT with, default is HEAD.
        """
        # handle no head scenario when this gets called before first commit
        try:
            self.head()
        except NoHeadSet:
            return False

        # get the tree
        tree = self.repo[self.head().tree]
        if self._obj_from_tree(tree, path) is not None:
            return True
        return False

    def _apply_to_tree(self, tree, f, path=None):
        """
        Walk a tree recursively and apply function, f, to each entry

        :param tree: a dulwich.objects.Tree object
        :param f: function that will be called with each entry.
        :param path: if provided, the path relative to the repository
                     will be included in the function call.
        """
        if type(tree) is not Tree:
            raise NotTreeError
        for entry in tree.iteritems():
            f(entry, path) if path else f(entry)
            obj = self.repo[entry.sha]
            if type(obj) is Tree:
                new_path = os.path.join(path, f) if path else None
                self._apply_to_tree(obj, f, new_path)

    def _obj_from_tree(self, tree, path):
        """
        Walk a tree recursively to retrieve and return a blob or sub-tree 
        from the given path, or return None if one does not exist.

        :param tree: a dulwich.objects.Tree object.
        :param path: path relative to the repository root. 

        :return: Tree object, Blob object, or None if the path could 
                 not be found.
        
        For example, providing ``hopper/git.py`` would return the 
        ``git.py`` blob within the ``hopper`` sub-tree.
        """
        if type(tree) is not Tree:
            raise NotTreeError('Object is not a tree')
        # remove trailing slashes from path (so basename doesn't return '')
        if path[-1] == os.sep:
            path = path[:-1]

        # we need the head of the path, which is either the file itself or a
        # directory.
        head = path.split(os.sep)[0]
        if len(head) > 1:
            # clip head from path for recursion
            new_path = os.sep.join(path.split(os.sep)[1:])

        for entry in tree.iteritems():
            # these are dulwich.objects.TreeEntry objects
            if entry.path == head:
                # get the Tree or Blob.
                obj = self.repo[entry.sha]
                # return if we're at the right path
                if head == path:
                    return obj
                # otherwise recurse if it's a Tree
                elif type(obj) is Tree:
                    return self._obj_from_tree(obj, new_path)

        # if we get here the path wasn't there.
        return None

    def _write_tree_to_wt(self, tree, basepath):
        """
        Walk a tree recursively and write each blob's data to the working 
        tree.

        :param tree: a dulwich.objects.Tree object.
        :param basepath: blob data is written to:
                         ``os.path.join(basepath, blob_path)``.
                         Recursive calls will append the sub-tree
                         name to the original call.
        """
        if type(tree) is not Tree:
            raise NotTreeError('Object is not a tree')
        for entry in tree.iteritems():
            obj = self.repo[entry.sha]
            if type(obj) is Blob:
                path = os.path.join(basepath, entry.path)
                with open(path, 'wb') as fp:
                    fp.write(obj.data)
            elif type(obj) is Tree:
                new_basepath = os.path.join(basepath, entry.path)
                self._write_tree_to_wt(obj, new_basepath)

    def _resolve_ref(self, ref):
        """
        Resolve a reference to a commit SHA.

        :param ref: branch, tag, commit reference.
        :return: a commit SHA.
        :raises KeyError: if ref doesn't point to a commit.
        :raises TypeError: if ref is not a string.
        """
        # order: branch -> tag -> commit
        # (tag and branch can have same name, git assumes branch)

        if type(ref) is not str:
            raise TypeError('ref must be a string')

        # dulwich.Repo.refs keys the full name
        # (i.e. 'refs/heads/master') for branches and tags
        branch = _expand_branch_name(ref)
        tag = _expand_tag_name(ref)

        # branch?
        if branch in self.repo.refs:
            # get the commit SHA that the branch points to
            return self.repo[branch].id
        # tag?
        elif tag in self.repo.refs:
            return self.repo[tag].id
        # commit?
        else:
            obj = self.repo[ref]
            if type(obj) is Commit:
                return obj.id
            else:
                raise KeyError('Bad reference: %s' % ref)

    def _diff_file(self, path, a, b=None, html=False):
        """
        Use difflib to compare a file between two commits, or a
        single commit and the working tree.

        :param a: ref to commit a.
        :param b: ref to commit b, defaults to the working tree.
        :param path: path to file, relative to repo root.
        :param html: format using difflib.HtmlDiff.
        :raise NotBlobError: if path wasn't present in both trees.
        """
        # resolve commit
        a = self._resolve_ref(a)
        # get the trees
        tree1 = self.repo[self.repo[a].tree]
        # get the blob
        blob1 = self._obj_from_tree(tree1, path)
        # set data or empty string (meaning no blob at path)
        data1 = blob1.data if type(blob1) is Blob else ''

        if b is None:
            with open(os.path.join(self.root, path), 'r') as fp:
                data2 = fp.read()
        else:
            b = self._resolve_ref(b)
            tree2 = self.repo[self.repo[b].tree]
            blob2 = self._obj_from_tree(tree2, path)
            data2 = blob2.data if type(blob2) is Blob else ''
            # if both blobs were missing => bad path
            if type(blob1) is not Blob and type(blob2) is not Blob:
                raise NotBlobError('Path did not point to a blob in either tree')

        diff = list(difflib.context_diff(data1.splitlines(), data2.splitlines()))
        return '\n'.join(diff)
예제 #36
0
class Wiki(HookMixin):
    path = None
    base_path = '/'
    default_ref = 'master'
    default_committer_name = 'Anon'
    default_committer_email = '*****@*****.**'
    index_page = 'home'
    repo = None

    def __init__(self, path):
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            self.repo = Repo.init(path, mkdir=True)
            # TODO add first commit here

        self.path = path

    def __repr__(self):
        return "Wiki: %s" % self.path

    def commit(self, name, email, message, files):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        if isinstance(name, text_type):
            name = name.encode('utf-8')
        if isinstance(email, text_type):
            email = email.encode('utf-8')
        if isinstance(message, text_type):
            message = message.encode('utf-8')
        author = committer = "%s <%s>".format(name, email).encode()
        self.repo.stage(files)
        return self.repo.do_commit(message=message,
                                   committer=committer,
                                   author=author)

    def get_page(self, name, sha='HEAD'):
        """Get page data, partials, commit info.

        :param name: Name of page.
        :param sha: Commit sha.
        :return: dict

        """
        return WikiPage(name, self, sha=sha)

    def get_index(self):
        """Get repo index of head.

        :return: list -- List of dicts

        """
        rv = []
        index = self.repo.open_index()
        for name in index:
            rv.append(
                dict(name=filename_to_cname(name),
                     filename=name,
                     ctime=index[name].ctime[0],
                     mtime=index[name].mtime[0],
                     sha=index[name].sha,
                     size=index[name].size))

        return rv
예제 #37
0
class Wiki(HookMixin):
    path = None
    base_path = '/'
    default_ref = 'master'
    default_committer_name = 'Anon'
    default_committer_email = '*****@*****.**'
    index_page = 'home'
    repo = None

    def __init__(self, path):
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            self.repo = Repo.init(path, mkdir=True)
            # TODO add first commit here

        self.path = path

    def __repr__(self):
        return "Wiki: {0}".format(self.path)

    def commit(self, name, email, message, files):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        if isinstance(name, text_type):
            name = name.encode('utf-8')
        if isinstance(email, text_type):
            email = email.encode('utf-8')
        if isinstance(message, text_type):
            message = message.encode('utf-8')
        author = committer = "{0} <{1}>".format(name, email).encode()
        self.repo.stage(files)
        return self.repo.do_commit(message=message,
                                   committer=committer,
                                   author=author)

    def get_page(self, name, sha='HEAD'):
        """Get page data, partials, commit info.

        :param name: Name of page.
        :param sha: Commit sha.
        :return: dict

        """
        return WikiPage(name, self, sha=sha)

    def get_index(self):
        """Get repo index of head.

        :return: list -- List of dicts

        """
        rv = []
        index = self.repo.open_index()
        for name in index:
            rv.append(dict(name=filename_to_cname(name),
                           filename=name,
                           ctime=index[name].ctime[0],
                           mtime=index[name].mtime[0],
                           sha=index[name].sha,
                           size=index[name].size))

        return rv
예제 #38
0
class BaseFetcher():
    def __init__(self, temp_parent_dir=None, repo_parent_dir=None):

        if temp_parent_dir is not None:
            self._temp_location = temp_parent_dir + "/" + self.name
            Path(self._temp_location).mkdir(exist_ok=True)

        if repo_parent_dir is not None:
            self._repo_location = repo_parent_dir + "/" + self.name
            Path(self._repo_location).mkdir(exist_ok=True)
            try:
                self.repo_load()
            except ValueError:
                self.repo_initialize()
        else:
            self.has_repo = False
            self.repo = None

    @property
    def name(self):
        return self._name

    @property
    def repo_location(self):
        return self._repo_location

    def retrieve_metadata(self):
        raise NotImplementedError()

    def retrieve_data(self, dataset):
        raise NotImplementedError()

    def update_data(self, from_date):
        raise NotImplementedError()

    def download_file(self, url, params, stream=False, local_repo_file=None):
        """
        Downloads a file and returns a BytesIO buffer. 
        Specify if download needs to happen on a stream, default False. 
        If the fetcher has a local repo, the local_repo_file string can be 
        provided to allow storing the file in the repo (relative file path to 
        the repo base path). If no local_repo_file is provided (None), the file 
        is NOT stored in the repo regardless of the fetcher having a repo or not.
        
        """
        log.info("Downloading file from url: {} - params: {}".format(
            url, params))
        rsp = requests.get(url, params, stream=stream)
        if rsp.status_code == 200:
            fb = BytesIO()
            for chunk in rsp.iter_content(chunk_size=None):
                if chunk:
                    fb.write(chunk)
            fb.seek(0)
            if self.has_repo and local_repo_file is not None:
                log.info("Saving to local file {}".format(local_repo_file))
                ds = local_repo_file.split('/')
                file_name = ds[-1]
                file_sublocation = "/".join(ds[0:-1])
                dir_path = Path(self._repo_location + '/' + file_sublocation)
                dir_path.mkdir(parents=True, exist_ok=True)
                file_path = dir_path.joinpath(file_name)
                with open(file_path, 'wb') as f:
                    f.write(fb.read())
                fb.seek(0)
                self.repo.stage([local_repo_file])
            return fb
        else:
            raise RuntimeError(
                "Downloading the requested file failed wit response status {}."
                .format(rsp.status_code))

    def repo_initialize(self):
        """
        Initializes a repo on disk where source data should be stored.
        The subdirectory of parent_location is based off the name of the 
        Fetcher. 
        
        """
        pth = Path(self.repo_location)
        gitpth = Path(self.repo_location + '/.git')
        if not pth.is_dir():
            pth.mkdir()
        if gitpth.is_dir():
            raise ValueError("Requested location already contains a repo.")
        self.repo = Repo.init(self.repo_location)
        self.has_repo = True

    def repo_load(self):
        """
        Loads an existing repo on disk.
        The repo should be located in the subdirectory of parent_location
        based off the name of the Fetcher.
        
        """
        gitpth = Path(self.repo_location + '/.git')
        if gitpth.is_dir():
            self.repo = Repo(self.repo_location)
            self.has_repo = True
        else:
            raise ValueError("Requested repo does not exist.")

    def repo_commit(self, msg):
        if self.has_repo:
            return self.repo.do_commit(msg.encode('UTF-8'))
        else:
            raise RuntimeError("Repo not initialized / loaded.")
예제 #39
0
class Repo(object):
    """
    An abstraction layer on top of dulwich.repo.Repo for higher-level
    git repository actions like:

    * adding only modified files
    * checking out whole trees (or paths within them) from refs
    * diffs with difflib
    * branching and tagging (both displaying and creating)
    * listing commits down from a ref

    Methods are structured to match git commands when appropriate.

    It also supports executing arbitrary git commands, if git is installed.
    Of course, everything else is implemented in pure python, so having git
    installed is optional.

    Should be considered a work-in-progress.
    """
    def __init__(self, path):
        self.repo = DulwichRepo(path)  # The inner Dulwich Repo object.
        self.root = path

    @classmethod
    def init(cls, path, mkdir=False, bare=False):
        """
        Initializes a normal or bare repository. This is mostly a
        handoff to Dulwich.
        
        :param path: the path (which must be a directory) to create
                     the repository within.
        :param mkdir: if True, make a directory at **path**. Equivalent 
                      to ``mkdir [path] && cd [path] && git init``.
        :param bare: if True, create a bare repository at the path.

        :return: a ``Repo`` instance.
        """
        if bare:
            DulwichRepo.init_bare(path)
        else:
            DulwichRepo.init(path, mkdir)
        return cls(path)

    def add(self, path=None, all=False, add_new_files=True):
        """
        Add files to the repository or staging area if new or modified. 
        Equivalent to the ``git add`` command. 

        :param path: the path to the file to add, relative to the
            repository root. 
        :param all: if True, add all files under the given path. If 
            **path** is omitted, the repository's root path will be used.
        :param add_new_files: if True, this command will also add new
            files. Note this is the default behavior. The option is 
            provided for situations (e.g. ``git commit -a``) where adding
            new files would be undesirable.

        :return: list of filepaths that were added.
                   
        If **path** is a file and **all** is True, only the single 
        file will be added.
        If **path** is a directory and **all** is False, nothing 
        will be added.
        Likewise, if both **path** and **all** are omitted, nothing 
        will be added.        

        Additionally, the ``add`` method checks to see if the path(s)
        have been modified. We don't want to create new blobs if we 
        don't need them.
        """

        # the implementation creates a list of paths and stages them using
        # dulwich.Repo.stage

        # Paths are a little tricky. To work with repositories independent
        # of the current working directory, we need absolute paths to files.
        # At the same time, git trees are relative to the repository root.
        # So, we have to do a few conversions.

        adds = []

        # get an absolute path for doing isfile/isdir checks.
        if path is not None:
            path = os.path.join(self.root, path)

        # add all files within given path
        if path is not None and all:
            if os.path.isdir(path):
                # walk the directory
                for directory, dirnames, filenames in os.walk(directory):
                    if '.git' in dirnames:
                        # in case path is root, don't traverse the .git subdir
                        dirnames.remove('.git')
                    for f in filenames:
                        path = os.path.join(directory, f)
                        adds.append(path)
            elif os.path.isfile(path):
                adds.append(path)

        # add all files within root path
        elif path is None and all:
            # walk the root directory
            for directory, dirnames, filenames in os.walk(self.root):
                if '.git' in dirnames:
                    # don't traverse the .git subdir
                    dirnames.remove('.git')
                for f in filenames:
                    path = os.path.join(directory, f)
                    adds.append(path)

        # add file at path
        elif path is not None:
            # add only if file
            if os.path.isfile(path):
                adds.append(path)

        # back to relative paths, so we can add them to the tree.
        rels = []
        for p in adds:
            # get the path relative to repo root.
            rels.append(os.path.relpath(p, self.root))
        adds = rels

        # filter unmodified files (and untracked files if not add_new_files)
        if add_new_files:
            adds = [f for f in adds if self._file_is_modified(f) or \
                    not self._file_in_tree(f)]
        else:
            adds = [f for f in adds if self._file_is_modified(f)]

        # don't waste time with stage if empty list.
        if adds:
            self.repo.stage(adds)

        return adds

    def branch(self, name=None, ref=None):
        """
        Create a new branch or display the current one. Equivalent to 
        `git branch`.
        
        :param name: the name of the branch
        :param ref: a commit reference (branch, tag, or SHA). Same idea 
                    as the git-branch ``--start-point`` option. Will 
                    create the branch off of the commit. Defaults to HEAD.
        :return: None on create, branch name on display.
        
        When the name param is not given, the current branch will be
        returned as a string using the branch's full name
        (i.e. ``refs/heads/[branch_name]``).
        """
        # create a branch
        if name is not None:
            if ref is None:
                ref = self.head().id
            else:
                ref = self._resolve_ref(ref)
            self.repo.refs['refs/heads/%s' % name] = ref
        # display the name of the current branch
        else:
            # couldn't find an easy way to get it out of dulwich,
            # which resolves HEAD to the commit, so we'll just read
            # .git/HEAD directly.
            path = os.path.join(self.repo._controldir, 'HEAD')
            if os.path.isfile(path):
                with open(path, 'r') as fp:
                    return fp.read().strip()[5:]

    def checkout(self, ref, path=None):
        """
        Checkout the entire tree (or a subset) of a commit given a branch, 
        tag, or commit SHA.

        This is a fairly naive implementation. It will just write the blob data
        recursively from the tree pointed at by the given reference, 
        overwriting the working tree as necessary. It doesn't do deletions or 
        renames.

        If you wanted to checkout 'HEAD':
          >>> repo.checkout(repo.head())

        If you wanted to checkout the master branch:
          >>> repo.checkout('master')

        If you wanted to checkout v1.2 (i.e. a tag):
          >>> repo.checkout('v1.2')

        :param ref: branch, tag, or commit
        :param path: checkout only file or directory at path, should be
                     relative to the repo's root. 
        :raises KeyError: if bad reference.
        """
        sha = self._resolve_ref(ref)
        obj = self.repo[sha]
        tree = self.repo[obj.tree]

        if tree is None:
            raise KeyError('Bad reference: %s' % ref)
        if path is None:
            path = self.root

        else:
            # check if path and self.root are same
            if not os.path.samefile(path, self.root):
                # if not, we need the path's tree
                # (a sub-tree of the commit tree)
                tree = self._obj_from_tree(tree, path)

        # write the tree
        self._write_tree_to_wt(tree, path)

    def cmd(self, cmd):
        """
        Run a raw git command from the shell and return any output. Unlike 
        other methods (which depend on Dulwich's git reimplementation and 
        not git itself), this is dependent on the git shell command. 

        The given git subcommand and arguments are prefixed with ``git`` and
        run through the subprocess module.

        To maintain the class's indifference to the current working directory,
        we also prepend the ``--git-dir`` and ``--work-tree`` arguments. 

        :param cmd: A list of command-line arguments (anything the subprocess 
                    module will take).
        :return: a string containing the command's output.

        **Usage** (output has been truncated for brevity):
          >>> repo.cmd(['checkout', '-q', 'master'])
          >>> repo.cmd(['commit', '-q', '-a', '-m', 'Initial Commit'])
          >>> repo.cmd(['remote', '-v'])
          "origin  [email protected]:hopper.git (fetch)\\n\\n origin ..."
          >>> repo.cmd(['log'])
          "commit 68a116eaee458607a3a9cf852df4f358a02bdb92\\nAuthor: Ni..."

        As you can see, it doesn't do any parsing of the output. It's available
        for times when the other methods don't get the job done.
        """

        if not type(cmd) is list:
            raise TypeError('cmd must be a list')
        git_dir = os.path.join(self.root, '.git')
        prefix = ['git', '--git-dir', git_dir, '--work-tree', self.root]
        # It would be nice to use check_output() here, but it's 2.7+
        return subprocess.Popen(prefix + cmd,
                                stdout=subprocess.PIPE).communicate()[0]

    def commit(self, all=False, **kwargs):
        """
        Commit the changeset to the repository.  Equivalent to the 
        `git commit` command.

        This method does a commit; use the ``commits`` method to 
        retrieve one or more commits.

        Uses ``dulwich.objects.BaseRepo.do_commit()``, see that for
        params. At minimum, you need to provide **committer** and 
        **message**. Everything else will be defaulted.

        :param all: commit all modified files that are already being tracked.
        :param \*\*kwargs: the commit attributes (e.g. committer, message,
                         etc.). Again, see the underlying dulwich method.
        """

        if all:
            # add all changes (to already tracked files)
            self.add(all=True, add_new_files=False)

        # pass the kwargs to dulwich, get the returned commit id.
        commit_id = self.repo.do_commit(**kwargs)

        # return the Commit object (instead of the id, which is less useful).
        return self.repo[commit_id]

    def commits(self, ref=None, n=10):
        """
        Return up to n-commits down from a ref (branch, tag, commit),
        or if no ref given, down from the HEAD.

        If you just want a single commit, it may be cleaner to use the
        ``object`` method.

        :param ref: a branch, tag (not yet), or commit SHA to use 
                          as a start point.
        :param n: the maximum number of commits to return. If fewer 
                  matching commits exist, only they will be returned.

        :return: a list of ``dulwich.objects.Commit`` objects.

        **Usage**:
          >>> repo.commits()
          [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>,
           <Commit 6336f47615da32d520a8d52223b9817ee50ca728>]
          >>> repo.commits()[0] == repo.head()
          True
          >>> repo.commits(n=1)
          [<Commit 6f50a9bcd25ddcbf21919040609a9ad3c6354f1c>]
          >>> repo.commits('6336f47615da32d520a8d52223b9817ee50ca728', n=1)
          [<Commit 6336f47615da32d520a8d52223b9817ee50ca728>]
        """

        start_point = self.head().id
        if ref is not None:
            start_point = self._resolve_ref(ref)
        return self.repo.revision_history(start_point)[:n]

    def diff(self, a, b=None, path=None):
        """
        Return a diff of commits a and b.

        :param a: a commit identifier.
        :param b: a commit identifier. Defaults to HEAD.
        :param path: a path to a file or directory to diff, relative
                     to the repo root. Defaults to the entire tree.
        """
        if not os.path.isfile(os.path.join(self.root, path)):
            raise NotImplementedError('Specify a file path for now')
        return self._diff_file(path, a, b)

    def head(self):
        """Return the HEAD commit or raise an error."""
        # It seems best to make this a function so we don't have to
        # set and continually update it.
        try:
            return self.repo['HEAD']
        except KeyError:
            # The HEAD will be missing before the repo is committed to.
            raise NoHeadSet

    def is_dirty(self):
        """Return True if there are uncommitted changes to the repository."""
        new, modified, deleted = self.status()
        if new or modified or deleted:
            return True
        return False

    def object(self, sha):
        """
        Retrieve an object from the repository.

        :param sha: the 40-byte hex-rep of the object's SHA1 identifier.
        """
        return self.repo[sha]

    def status(self, from_path=None):
        """
        Compare the working directory with HEAD.

        :param from_path: show changes within this path, which must be a
                          file or directory relative to the repo.
        :return: a tuple containing three lists: new, modified, deleted
        """
        # TODO: also compare the index and HEAD, or the index and WT

        # use from_path if set, otherwise root.
        if from_path is not None:
            from_path = os.path.join(self.root, from_path)
            if not os.path.exists(from_path):
                raise OSError('from_path does not exist.')
            path = from_path
        else:
            path = self.root

        # store changes in dictionary
        changes = {}
        changes['new'] = []
        changes['modified'] = []
        changes['deleted'] = []

        # path is a file
        if os.path.isfile(path):
            status = self._file_status(path)
            if status == FILE_IS_NEW:
                changes['new'].append(path)
            elif status == FILE_IS_MODIFIED:
                changes['modified'].append(path)
            elif status == FILE_IS_DELETED:
                changes['deleted'].append(path)

        # path is a directory
        elif os.path.isdir(path):
            for directory, dirnames, filenames in os.walk(path):
                if '.git' in dirnames:
                    dirnames.remove('.git')
                for f in filenames:
                    fpath = os.path.relpath(os.path.join(directory, f),
                                            self.root)
                    status = self._file_status(fpath)
                    if status == FILE_IS_NEW:
                        changes['new'].append(fpath)
                    elif status == FILE_IS_MODIFIED:
                        changes['modified'].append(fpath)
                    elif status == FILE_IS_DELETED:
                        changes['deleted'].append(fpath)

        return changes['new'], changes['modified'], changes['deleted']

    def tag(self, name, ref=None):
        """
        Create a tag.

        :param name: name of the new tag (e.g. 'v1.0' or '1.0.6')
        :param ref: a commit ref to tag, defaults to HEAD.
        """
        # TODO: display tags attached to HEAD when no args.
        if ref is None:
            ref = self.head().id
        ref = self._resolve_ref(ref)
        self.repo.refs['refs/tags/%s' % name] = ref

    def tree(self, sha=None):
        """
        Return the tree with given SHA, or if no SHA given, return the
        HEAD commit's tree. Raise an error if an object matches the SHA, 
        but is not a tree.

        :param sha: tree reference. 
        
        Note that a commit reference would not work. To get a commit's 
        tree, just provide ``c.tree``, which contains the SHA we need.
        """
        if sha is None:
            obj = self.repo[self.head().tree]
        else:
            obj = self.repo[sha]
        if type(obj) is Tree:
            return obj
        else:
            raise NotTreeError('Object is not a Tree')

    def _file_status(self, path, ref=None):
        """
        Checks the status of a file in the working tree relative to a
        commit (usually HEAD). Statuses include: new, modified, and deleted.

        These statuses are conveyed as constants::

        FILE_IS_UNCHANGED = 0
        FILE_IS_NEW       = 1
        FILE_IS_MODIFIED  = 2
        FILE_IS_DELETED   = 3

        :param path: file path relative to the repo
        :param ref: optional ref to compare the WT with, default is HEAD.
        :return: status constant
        """
        full_path = os.path.join(self.root, path)
        in_work_tree = os.path.exists(full_path)
        in_tree = self._file_in_tree(path)

        # new
        if not in_tree and in_work_tree:
            return FILE_IS_NEW
        # deleted
        elif in_tree and not in_work_tree:
            return FILE_IS_DELETED
        # modified
        elif in_tree and in_work_tree and self._file_is_modified(path):
            return FILE_IS_MODIFIED
        # unchanged
        elif in_tree and in_work_tree:
            return FILE_IS_UNCHANGED
        # does not exist (at least in our 2-tree world)
        else:
            raise KeyError('Path not found in either tree.')

    def _file_is_modified(self, path, ref=None):
        """
        Returns True if the current file (in the WT) has been modified from 
        the blob in the commit's tree, False otherwise.

        :param path: path to the file relative to the repository root.
        :param ref: optional ref to compare the WT with, default is HEAD.

        This returns False for new files (not present in the tree). If this
        is unexpected, just call ``_file_in_tree`` first.

        It assumes that the given path does exist. Just expect an OSError
        if it doesn't.
        """
        # handle no head scenario when this gets called before first commit
        try:
            self.head()
        except NoHeadSet:
            return False

        # get the tree
        tree = self.repo[self.head().tree]
        # get the blob from the tree
        blob1 = self._obj_from_tree(tree, path)
        if type(blob1) is not Blob:
            return False

        # make a second blob from the current file
        with open(os.path.join(self.root, path), 'r') as fp:
            blob2 = Blob.from_string(fp.read())
        # are the two blobs equivalent?
        # if their contents are the same they should be...
        # calls dulwich.objects.ShaFile.__eq__, which just compares SHAs
        return blob1 != blob2

    def _file_in_tree(self, path, ref=None):
        """
        Returns True if the file corresponds to a blob in the HEAD 
        commit's tree, False otherwise.

        :param path: path to the file relative to the repository root.
        :param ref: optional ref to compare the WT with, default is HEAD.
        """
        # handle no head scenario when this gets called before first commit
        try:
            self.head()
        except NoHeadSet:
            return False

        # get the tree
        tree = self.repo[self.head().tree]
        if self._obj_from_tree(tree, path) is not None:
            return True
        return False

    def _apply_to_tree(self, tree, f, path=None):
        """
        Walk a tree recursively and apply function, f, to each entry

        :param tree: a dulwich.objects.Tree object
        :param f: function that will be called with each entry.
        :param path: if provided, the path relative to the repository
                     will be included in the function call.
        """
        if type(tree) is not Tree:
            raise NotTreeError
        for entry in tree.iteritems():
            f(entry, path) if path else f(entry)
            obj = self.repo[entry.sha]
            if type(obj) is Tree:
                new_path = os.path.join(path, f) if path else None
                self._apply_to_tree(obj, f, new_path)

    def _obj_from_tree(self, tree, path):
        """
        Walk a tree recursively to retrieve and return a blob or sub-tree 
        from the given path, or return None if one does not exist.

        :param tree: a dulwich.objects.Tree object.
        :param path: path relative to the repository root. 

        :return: Tree object, Blob object, or None if the path could 
                 not be found.
        
        For example, providing ``hopper/git.py`` would return the 
        ``git.py`` blob within the ``hopper`` sub-tree.
        """
        if type(tree) is not Tree:
            raise NotTreeError('Object is not a tree')
        # remove trailing slashes from path (so basename doesn't return '')
        if path[-1] == os.sep:
            path = path[:-1]

        # we need the head of the path, which is either the file itself or a
        # directory.
        head = path.split(os.sep)[0]
        if len(head) > 1:
            # clip head from path for recursion
            new_path = os.sep.join(path.split(os.sep)[1:])

        for entry in tree.iteritems():
            # these are dulwich.objects.TreeEntry objects
            if entry.path == head:
                # get the Tree or Blob.
                obj = self.repo[entry.sha]
                # return if we're at the right path
                if head == path:
                    return obj
                # otherwise recurse if it's a Tree
                elif type(obj) is Tree:
                    return self._obj_from_tree(obj, new_path)

        # if we get here the path wasn't there.
        return None

    def _write_tree_to_wt(self, tree, basepath):
        """
        Walk a tree recursively and write each blob's data to the working 
        tree.

        :param tree: a dulwich.objects.Tree object.
        :param basepath: blob data is written to:
                         ``os.path.join(basepath, blob_path)``.
                         Recursive calls will append the sub-tree
                         name to the original call.
        """
        if type(tree) is not Tree:
            raise NotTreeError('Object is not a tree')
        for entry in tree.iteritems():
            obj = self.repo[entry.sha]
            if type(obj) is Blob:
                path = os.path.join(basepath, entry.path)
                with open(path, 'wb') as fp:
                    fp.write(obj.data)
            elif type(obj) is Tree:
                new_basepath = os.path.join(basepath, entry.path)
                self._write_tree_to_wt(obj, new_basepath)

    def _resolve_ref(self, ref):
        """
        Resolve a reference to a commit SHA.

        :param ref: branch, tag, commit reference.
        :return: a commit SHA.
        :raises KeyError: if ref doesn't point to a commit.
        :raises TypeError: if ref is not a string.
        """
        # order: branch -> tag -> commit
        # (tag and branch can have same name, git assumes branch)

        if type(ref) is not str:
            raise TypeError('ref must be a string')

        # dulwich.Repo.refs keys the full name
        # (i.e. 'refs/heads/master') for branches and tags
        branch = _expand_branch_name(ref)
        tag = _expand_tag_name(ref)

        # branch?
        if branch in self.repo.refs:
            # get the commit SHA that the branch points to
            return self.repo[branch].id
        # tag?
        elif tag in self.repo.refs:
            return self.repo[tag].id
        # commit?
        else:
            obj = self.repo[ref]
            if type(obj) is Commit:
                return obj.id
            else:
                raise KeyError('Bad reference: %s' % ref)

    def _diff_file(self, path, a, b=None, html=False):
        """
        Use difflib to compare a file between two commits, or a
        single commit and the working tree.

        :param a: ref to commit a.
        :param b: ref to commit b, defaults to the working tree.
        :param path: path to file, relative to repo root.
        :param html: format using difflib.HtmlDiff.
        :raise NotBlobError: if path wasn't present in both trees.
        """
        # resolve commit
        a = self._resolve_ref(a)
        # get the trees
        tree1 = self.repo[self.repo[a].tree]
        # get the blob
        blob1 = self._obj_from_tree(tree1, path)
        # set data or empty string (meaning no blob at path)
        data1 = blob1.data if type(blob1) is Blob else ''

        if b is None:
            with open(os.path.join(self.root, path), 'r') as fp:
                data2 = fp.read()
        else:
            b = self._resolve_ref(b)
            tree2 = self.repo[self.repo[b].tree]
            blob2 = self._obj_from_tree(tree2, path)
            data2 = blob2.data if type(blob2) is Blob else ''
            # if both blobs were missing => bad path
            if type(blob1) is not Blob and type(blob2) is not Blob:
                raise NotBlobError(
                    'Path did not point to a blob in either tree')

        diff = list(
            difflib.context_diff(data1.splitlines(), data2.splitlines()))
        return '\n'.join(diff)
예제 #40
0
파일: repo.py 프로젝트: alexmavr/VisTrails
class GitRepo(object):
    def __init__(self, path):
        if os.path.exists(path):
            if not os.path.isdir(path):
                raise IOError('Git repository "%s" must be a directory.' %
                              path)
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            # repo does not exist
            self.repo = Repo.init(path, not os.path.exists(path))
    
        self.temp_persist_files = []

    def _get_commit(self, version="HEAD"):
        commit = self.repo[version]
        if not isinstance(commit, Commit):
            raise NotCommitError(commit)
        return commit

    def get_type(self, name, version="HEAD"):
        commit = self._get_commit(version)

        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        if tree[name][0] & stat.S_IFDIR:
            return "tree"
        else:
            return "blob"

    def get_path(self, name, version="HEAD", path_type=None, out_name=None,
                 out_suffix=''):
        if path_type is None:
            path_type = self.get_type(name, version)
        if path_type == 'tree':
            return self.get_dir(name, version, out_name, out_suffix)
        elif path_type == 'blob':
            return self.get_file(name, version, out_name, out_suffix)

        raise TypeError("Unknown path type '%s'" % path_type)

    def _write_blob(self, blob_sha, out_fname=None, out_suffix=''):
        if out_fname is None:
            # create a temporary file
            (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix,
                                               prefix='vt_persist')
            os.close(fd)
            self.temp_persist_files.append(out_fname)
        else:
            out_dirname = os.path.dirname(out_fname)
            if out_dirname and not os.path.exists(out_dirname):
                os.makedirs(out_dirname)
        
        blob = self.repo.get_blob(blob_sha)
        with open(out_fname, "wb") as f:
            for b in blob.as_raw_chunks():
                f.write(b)
        return out_fname

    def get_file(self, name, version="HEAD", out_fname=None, 
                 out_suffix=''):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find blob "%s"' % name)
        blob_sha = tree[name][1]
        out_fname = self._write_blob(blob_sha, out_fname, out_suffix)
        return out_fname

    def get_dir(self, name, version="HEAD", out_dirname=None, 
                out_suffix=''):
        if out_dirname is None:
            # create a temporary directory
            out_dirname = tempfile.mkdtemp(suffix=out_suffix,
                                           prefix='vt_persist')
            self.temp_persist_files.append(out_dirname)
        elif not os.path.exists(out_dirname):
            os.makedirs(out_dirname)
        
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find tree "%s"' % name)
        subtree_id = tree[name][1]
        # subtree = self.repo.tree(subtree_id)
        for entry in self.repo.object_store.iter_tree_contents(subtree_id):
            out_fname = os.path.join(out_dirname, entry.path)
            self._write_blob(entry.sha, out_fname)
        return out_dirname

    def get_hash(self, name, version="HEAD", path_type=None):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        return tree[name][1]

    @staticmethod
    def compute_blob_hash(fname, chunk_size=1<<16):
        obj_len = os.path.getsize(fname)
        head = object_header(Blob.type_num, obj_len)
        with open(fname, "rb") as f:
            def read_chunk():
                return f.read(chunk_size)
            my_iter = chain([head], iter(read_chunk,''))
            return iter_sha1(my_iter)
        return None

    @staticmethod
    def compute_tree_hash(dirname):
        tree = Tree()
        for entry in sorted(os.listdir(dirname)):
            fname = os.path.join(dirname, entry)
            if os.path.isdir(fname):
                thash = GitRepo.compute_tree_hash(fname)
                mode = stat.S_IFDIR # os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, thash)
            elif os.path.isfile(fname):
                bhash = GitRepo.compute_blob_hash(fname)
                mode = os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, bhash)
        return tree.id

    @staticmethod
    def compute_hash(path):
        if os.path.isdir(path):
            return GitRepo.compute_tree_hash(path)
        elif os.path.isfile(path):
            return GitRepo.compute_blob_hash(path)
        raise TypeError("Do not support this type of path")

    def get_latest_version(self, path):
        head = self.repo.head()
        walker = Walker(self.repo.object_store, [head], max_entries=1, 
                        paths=[path])
        return iter(walker).next().commit.id

    def _stage(self, filename):
        fullpath = os.path.join(self.repo.path, filename)
        if os.path.islink(fullpath):
            debug.warning("Warning: not staging symbolic link %s" % os.path.basename(filename))
        elif os.path.isdir(fullpath):
            for f in os.listdir(fullpath):
                self._stage(os.path.join(filename, f))
        else:
            if os.path.sep != '/':
                filename = filename.replace(os.path.sep, '/')
            self.repo.stage(filename)

    def add_commit(self, filename):
        self.setup_git()
        self._stage(filename)
        commit_id = self.repo.do_commit('Updated %s' % filename)
        return commit_id

    def setup_git(self):
        config_stack = self.repo.get_config_stack()

        try:
            config_stack.get(('user',), 'name')
            config_stack.get(('user',), 'email')
        except KeyError:
            from vistrails.core.system import current_user
            from dulwich.config import ConfigFile
            user = current_user()
            repo_conf = self.repo.get_config()
            repo_conf.set(('user',), 'name', user)
            repo_conf.set(('user',), 'email', '%s@localhost' % user)
            repo_conf.write_to_path()
예제 #41
0
class Gittle(object):
    """All paths used in Gittle external methods must be paths relative to the git repository
    """
    DEFAULT_COMMIT = 'HEAD'
    DEFAULT_BRANCH = 'master'
    DEFAULT_REMOTE = 'origin'
    DEFAULT_MESSAGE = '**No Message**'
    DEFAULT_USER_INFO = {
        'name': None,
        'email': None,
    }

    DIFF_FUNCTIONS = {
        'classic': utils.git.classic_tree_diff,
        'dict': utils.git.dict_tree_diff,
        'changes': utils.git.dict_tree_diff
    }
    DEFAULT_DIFF_TYPE = 'dict'

    HIDDEN_REGEXES = [
        # Hide git directory
        r'.*\/\.git\/.*',
    ]

    # References
    REFS_BRANCHES = 'refs/heads/'
    REFS_REMOTES = 'refs/remotes/'
    REFS_TAGS = 'refs/tags/'

    # Name pattern truths
    # Used for detecting if files are :
    # - deleted
    # - added
    # - changed
    PATTERN_ADDED = (False, True)
    PATTERN_REMOVED = (True, False)
    PATTERN_MODIFIED = (True, True)

    # Permissions
    MODE_DIRECTORY = 0o40000  # Used to tell if a tree entry is a directory

    # Tree depth
    MAX_TREE_DEPTH = 1000

    # Acceptable Root paths
    ROOT_PATHS = (os.path.curdir, os.path.sep)

    def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs):
        if isinstance(repo_or_path, DulwichRepo):
            self.repo = repo_or_path
        elif isinstance(repo_or_path, Gittle):
            self.repo = DulwichRepo(repo_or_path.path)
        elif isinstance(repo_or_path, basestring):
            path = os.path.abspath(repo_or_path)
            self.repo = DulwichRepo(path)
        else:
            logging.warning('Repo is of type %s' % type(repo_or_path))
            raise Exception('Gittle must be initialized with either a dulwich repository or a string to the path')

        # Set path
        self.path = self.repo.path

        # The remote url
        self.origin_uri = origin_uri

        # Report client activty
        self._report_activity = report_activity

        # Build ignore filter
        self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES)
        self.hidden_regexes.extend(self._get_ignore_regexes())
        self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes)
        self.filters = [
            self.ignore_filter,
        ]

        # Get authenticator
        if auth:
            self.authenticator = auth
        else:
            self.auth(*args, **kwargs)

    def report_activity(self, *args, **kwargs):
        if not self._report_activity:
            return
        return self._report_activity(*args, **kwargs)

    def _format_author(self, name, email):
        return "%s <%s>" % (name, email)

    def _format_userinfo(self, userinfo):
        name = userinfo.get('name')
        email = userinfo.get('email')
        if name and email:
            return self._format_author(name, email)
        return None

    def _format_ref(self, base, extra):
        return ''.join([base, extra])

    def _format_ref_branch(self, branch_name):
        return self._format_ref(self.REFS_BRANCHES, branch_name)

    def _format_ref_remote(self, remote_name):
        return self._format_ref(self.REFS_REMOTES, remote_name)

    def _format_ref_tag(self, tag_name):
        return self._format_ref(self.REFS_TAGS, tag_name)

    @property
    def head(self):
        """Return SHA of the current HEAD
        """
        return self.repo.head()

    @property
    def is_bare(self):
        """Bare repositories have no working directories or indexes
        """
        return self.repo.bare

    @property
    def is_working(self):
        return not(self.is_bare)

    def has_index(self):
        """Opposite of is_bare
        """
        return self.repo.has_index()

    @property
    def has_commits(self):
        """
        If the repository has no HEAD we consider that is has no commits
        """
        try:
            self.repo.head()
        except KeyError:
            return False
        return True

    def ref_walker(self, ref=None):
        """
        Very simple, basic walker
        """
        ref = ref or 'HEAD'
        sha = self._commit_sha(ref)
        for entry in self.repo.get_walker(sha):
            yield entry.commit

    def branch_walker(self, branch):
        branch = branch or self.active_branch
        ref = self._format_ref_branch(branch)
        return self.ref_walker(ref)

    def commit_info(self, start=0, end=None, branch=None):
        """Return a generator of commits with all their attached information
        """
        if not self.has_commits:
            return []
        commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)]
        if not end:
            return commits
        return commits[start:end]


    @funky.uniquify
    def recent_contributors(self, n=None, branch=None):
        n = n or 10
        return funky.pluck(self.commit_info(end=n, branch=branch), 'author')

    @property
    def commit_count(self):
        try:
            return len(self.ref_walker())
        except KeyError:
            return 0

    def commits(self):
        """Return a list of SHAs for all the concerned commits
        """
        return [commit['sha'] for commit in self.commit_info()]

    @property
    def git_dir(self):
        return self.repo.controldir()

    def auth(self, *args, **kwargs):
        self.authenticator = GittleAuth(*args, **kwargs)
        return self.authenticator

    # Generate a branch selector (used for pushing)
    def _wants_branch(self, branch_name=None):
        branch_name = branch_name or self.active_branch
        refs_key = self._format_ref_branch(branch_name)
        sha = self.branches[branch_name]

        def wants_func(old):
            refs_key = self._format_ref_branch(branch_name)
            return {
                refs_key: sha
            }
        return wants_func

    def _get_ignore_regexes(self):
        gitignore_filename = os.path.join(self.path, '.gitignore')
        if not os.path.exists(gitignore_filename):
            return []
        lines = open(gitignore_filename).readlines()
        globers = map(lambda line: line.rstrip(), lines)
        return utils.paths.globers_to_regex(globers)

    # Get the absolute path for a file in the git repo
    def abspath(self, repo_file):
        return os.path.abspath(
            os.path.join(self.path, repo_file)
        )

    # Get the relative path from the absolute path
    def relpath(self, abspath):
        return os.path.relpath(abspath, self.path)

    @property
    def last_commit(self):
        return self[self.repo.head()]

    @property
    def index(self):
        return self.repo.open_index()

    @classmethod
    def init(cls, path, bare=None, *args, **kwargs):
        """Initialize a repository"""
        mkdir_safe(path)

        # Constructor to use
        if bare:
            constructor = DulwichRepo.init_bare
        else:
            constructor = DulwichRepo.init

        # Create dulwich repo
        repo = constructor(path)

        # Create Gittle repo
        return cls(repo, *args, **kwargs)

    @classmethod
    def init_bare(cls, *args, **kwargs):
        kwargs.setdefault('bare', True)
        return cls.init(*args, **kwargs)

    @classmethod
    def is_repo(cls, path):
        """Returns True if path is a git repository, False if it is not"""
        try:
            repo = Gittle(path)
        except NotGitRepository:
            return False
        else:
            return True

    def get_client(self, origin_uri=None, **kwargs):
        # Get the remote URL
        origin_uri = origin_uri or self.origin_uri

        # Fail if inexistant
        if not origin_uri:
            raise InvalidRemoteUrl()

        client_kwargs = {}
        auth_kwargs = self.authenticator.kwargs()

        client_kwargs.update(auth_kwargs)
        client_kwargs.update(kwargs)
        client_kwargs.update({
            'report_activity': self.report_activity
        })

        client, remote_path = get_transport_and_path(origin_uri, **client_kwargs)
        return client, remote_path

    def push_to(self, origin_uri, branch_name=None, progress=None):
        selector = self._wants_branch(branch_name=branch_name)
        client, remote_path = self.get_client(origin_uri)
        return client.send_pack(
            remote_path,
            selector,
            self.repo.object_store.generate_pack_contents,
            progress=progress
        )

    # Like: git push
    def push(self, origin_uri=None, branch_name=None, progress=None):
        return self.push_to(origin_uri, branch_name, progress)

    # Not recommended at ALL ... !!!
    def dirty_pull_from(self, origin_uri, branch_name=None):
        # Remove all previously existing data
        rmtree(self.path)
        mkdir_safe(self.path)
        self.repo = DulwichRepo.init(self.path)

        # Fetch brand new copy from remote
        return self.pull_from(origin_uri, branch_name)

    def pull_from(self, origin_uri, branch_name=None):
        return self.fetch(origin_uri)

    # Like: git pull
    def pull(self, origin_uri=None, branch_name=None):
        return self.pull_from(origin_uri, branch_name)

    def fetch_remote(self, origin_uri=None):
        # Get client
        client, remote_path = self.get_client(origin_uri=origin_uri)

        # Fetch data from remote repository
        remote_refs = client.fetch(remote_path, self.repo)

        return remote_refs


    def _setup_fetched_refs(self, refs, origin, bare):
        remote_tags = utils.git.subrefs(refs, 'refs/tags')
        remote_heads = utils.git.subrefs(refs, 'refs/heads')

        # Filter refs
        clean_remote_tags = utils.git.clean_refs(remote_tags)
        clean_remote_heads = utils.git.clean_refs(remote_heads)

        # Base of new refs
        heads_base = 'refs/remotes/' + origin
        if bare:
            heads_base = 'refs/heads'

        # Import branches
        self.import_refs(
            heads_base,
            clean_remote_heads
        )

        # Import tags
        self.import_refs(
            'refs/tags',
            clean_remote_tags
        )

        # Update HEAD
        for k, v in utils.git.clean_refs(refs).items():
            self[k] = v


    def fetch(self, origin_uri=None, bare=None, origin=None):
        bare = bare or False
        origin = origin or self.DEFAULT_REMOTE

        # Remote refs
        remote_refs = self.fetch_remote(origin_uri)

        # Update head
        # Hit repo because head doesn't yet exist so
        # print("REFS = %s" % remote_refs)

        # If no refs (empty repository()
        if not remote_refs:
            return

        # Update refs (branches, tags, HEAD)
        self._setup_fetched_refs(remote_refs, origin, bare)

        # Checkout working directories
        if not bare and self.has_commits:
            self.checkout_all()
        else:
            self.update_server_info()


    @classmethod
    def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs):
        """Clone a remote repository"""
        mkdir_safe(local_path)

        # Initialize the local repository
        if bare:
            local_repo = cls.init_bare(local_path)
        else:
            local_repo = cls.init(local_path)

        repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs)

        repo.fetch(bare=bare)

        # Add origin
        repo.add_remote('origin', origin_uri)

        return repo

    @classmethod
    def clone_bare(cls, *args, **kwargs):
        """Same as .clone except clones to a bare repository by default
        """
        kwargs.setdefault('bare', True)
        return cls.clone(*args, **kwargs)

    def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs):

        if not tree:
            # If no tree then stage files
            modified_files = files or self.modified_files
            logging.info("STAGING : %s" % modified_files)
            self.repo.stage(modified_files)

        # Messages
        message = message or self.DEFAULT_MESSAGE
        author_msg = self._format_userinfo(author)
        committer_msg = self._format_userinfo(committer)

        return self.repo.do_commit(
            message=message,
            author=author_msg,
            committer=committer_msg,
            encoding='UTF-8',
            tree=tree,
            *args, **kwargs
        )

    def _tree_from_structure(self, structure):
        # TODO : Support directories
        tree = Tree()

        for file_info in structure:

            # str only
            try:
                data = file_info['data'].encode('ascii')
                name = file_info['name'].encode('ascii')
                mode = file_info['mode']
            except:
                # Skip file on encoding errors
                continue

            blob = Blob()

            blob.data = data

            # Store file's contents
            self.repo.object_store.add_object(blob)

            # Add blob entry
            tree.add(
                name,
                mode,
                blob.id
            )

        # Store tree
        self.repo.object_store.add_object(tree)

        return tree.id

    # Like: git commmit -a
    def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs):
        user_info = {
            'name': name,
            'email': email,
        }
        return self._commit(
            committer=user_info,
            author=user_info,
            message=message,
            files=files,
            *args,
            **kwargs
        )

    def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs):
        """Main use is to do commits directly to bare repositories
        For example doing a first Initial Commit so the repo can be cloned and worked on right away
        """
        if not structure:
            return
        tree = self._tree_from_structure(structure)

        user_info = {
            'name': name,
            'email': email,
        }

        return self._commit(
            committer=user_info,
            author=user_info,
            message=message,
            tree=tree,
            *args,
            **kwargs
        )

    # Push all local commits
    # and pull all remote commits
    def sync(self, origin_uri=None):
        self.push(origin_uri)
        return self.pull(origin_uri)

    def lookup_entry(self, relpath, trackable_files=set()):
        if not relpath in trackable_files:
            raise KeyError

        abspath = self.abspath(relpath)

        with open(abspath, 'rb') as git_file:
            data = git_file.read()
            s = sha1()
            s.update("blob %u\0" % len(data))
            s.update(data)
        return (s.hexdigest(), os.stat(abspath).st_mode)

    @property
    @funky.transform(set)
    def tracked_files(self):
        return list(self.index)

    @property
    @funky.transform(set)
    def raw_files(self):
        return utils.paths.subpaths(self.path)

    @property
    @funky.transform(set)
    def ignored_files(self):
        return utils.paths.subpaths(self.path, filters=self.filters)

    @property
    @funky.transform(set)
    def trackable_files(self):
        return self.raw_files - self.ignored_files

    @property
    @funky.transform(set)
    def untracked_files(self):
        return self.trackable_files - self.tracked_files

    """
    @property
    @funky.transform(set)
    def modified_staged_files(self):
        "Checks if the file has changed since last commit"
        timestamp = self.last_commit.commit_time
        index = self.index
        return [
            f
            for f in self.tracked_files
            if index[f][1][0] > timestamp
        ]
    """

    # Return a list of tuples
    # representing the changed elements in the git tree
    def _changed_entries(self, ref=None):
        ref = ref or self.DEFAULT_COMMIT
        if not self.has_commits:
            return []
        obj_sto = self.repo.object_store
        tree_id = self[ref].tree
        names = self.trackable_files

        lookup_func = partial(self.lookup_entry, trackable_files=names)

        # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...]
        tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False)
        return list(tree_diff)

    @funky.transform(set)
    def _changed_entries_by_pattern(self, pattern):
        changed_entries = self._changed_entries()
        filtered_paths = None
         #if the pattern is PATTERN_MODIFIED, should check the sha
        if self.PATTERN_MODIFIED == pattern:
            filtered_paths = [
              funky.first_true(names)
                  for names, modes, sha in changed_entries
                  if tuple(map(bool, names)) == pattern and funky.first_true(names) and sha[0] == sha[1]
            ]
        else :
            filtered_paths = [
               funky.first_true(names)
                 for names, modes, sha in changed_entries
                 if tuple(map(bool, names)) == pattern and funky.first_true(names)
            ]
        return filtered_paths

    @property
    @funky.transform(set)
    def removed_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files

    @property
    @funky.transform(set)
    def added_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files

    @property
    @funky.transform(set)
    def modified_files(self):
        modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files
        return modified_files

    @property
    @funky.transform(set)
    def modified_unstaged_files(self):
        timestamp = self.last_commit.commit_time
        return [
            f
            for f in self.tracked_files
            if os.stat(self.abspath(f)).st_mtime > timestamp
        ]

    @property
    def pending_files(self):
        """
        Returns a list of all files that could be possibly staged
        """
        # Union of both
        return self.modified_files | self.added_files | self.removed_files

    @property
    def pending_files_by_state(self):
        files = {
            'modified': self.modified_files,
            'added': self.added_files,
            'removed': self.removed_files
        }

        # "Flip" the dictionary
        return {
            path: state
            for state, paths in files.items()
            for path in paths
        }

    """
    @property
    @funky.transform(set)
    def modified_files(self):
        return self.modified_staged_files | self.modified_unstaged_files
    """

    # Like: git add
    @funky.arglist_method
    def stage(self, files):
        return self.repo.stage(files)

    def add(self, *args, **kwargs):
        return self.stage(*args, **kwargs)

    # Like: git rm
    @funky.arglist_method
    def rm(self, files, force=False):
        index = self.index
        index_files = filter(lambda f: f in index, files)
        for f in index_files:
            del self.index[f]
        return index.write()

    def mv_fs(self, file_pair):
        old_name, new_name = file_pair
        os.rename(old_name, new_name)

    # Like: git mv
    @funky.arglist_method
    def mv(self, files_pair):
        index = self.index
        files_in_index = filter(lambda f: f[0] in index, files_pair)
        map(self.mv_fs, files_in_index)
        old_files = map(funky.first, files_in_index)
        new_files = map(funky.last, files_in_index)
        self.add(new_files)
        self.rm(old_files)
        self.add(old_files)
        return

    @working_only
    def _checkout_tree(self, tree):
        return build_index_from_tree(
            self.repo.path,
            self.repo.index_path(),
            self.repo.object_store,
            tree
        )

    def checkout_all(self, commit_sha=None):
        commit_sha = commit_sha or self.head
        commit_tree = self._commit_tree(commit_sha)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    def checkout(self, ref):
        """Checkout a given ref or SHA
        """
        self.repo.refs.set_symbolic_ref('HEAD', ref)
        commit_tree = self._commit_tree(ref)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    @funky.arglist_method
    def reset(self, files, commit='HEAD'):
        pass

    def rm_all(self):
        # if we go at the index via the property, it is reconstructed
        # each time and therefore clear() doesn't have the desired effect,
        # therefore, we cache it in a variable and use that.
        i = self.index
        i.clear()
        return i.write()

    def _to_commit(self, commit_obj):
        """Allows methods to accept both SHA's or dulwich Commit objects as arguments
        """
        if isinstance(commit_obj, basestring):
            return self.repo[commit_obj]
        return commit_obj

    def _commit_sha(self, commit_obj):
        """Extracts a Dulwich commits SHA
        """
        if utils.git.is_sha(commit_obj):
            return commit_obj
        elif isinstance(commit_obj, basestring):
            # Can't use self[commit_obj] to avoid infinite recursion
            commit_obj = self.repo[self.dwim_reference(commit_obj)]
        return commit_obj.id

    def dwim_reference(self, ref):
        """Dwim resolves a short reference to a full reference
        """

        # Formats of refs we want to try in order
        formats = [
            "%s",
            "refs/%s",
            "refs/tags/%s",
            "refs/heads/%s",
            "refs/remotes/%s",
            "refs/remotes/%s/HEAD",
        ]

        for f in formats:
            try:
                fullref = f % ref
                if not fullref in self.repo:
                    continue
                return fullref
            except:
                continue

        raise Exception("Could not resolve ref")

    def blob_data(self, sha):
        """Return a blobs content for a given SHA
        """
        return self[sha].data

    # Get the nth parent back for a given commit
    def get_parent_commit(self, commit, n=None):
        """ Recursively gets the nth parent for a given commit
            Warning: Remember that parents aren't the previous commits
        """
        if n is None:
            n = 1
        commit = self._to_commit(commit)
        parents = commit.parents

        if n <= 0 or not parents:
            # Return a SHA
            return self._commit_sha(commit)

        parent_sha = parents[0]
        parent = self[parent_sha]

        # Recur
        return self.get_parent_commit(parent, n - 1)

    def get_previous_commit(self, commit_ref, n=None):
        commit_sha = self._parse_reference(commit_ref)
        n = n or 1
        commits = self.commits()
        return funky.next(commits, commit_sha, n=n, default=commit_sha)

    def _parse_reference(self, ref_string):
        # COMMIT_REF~x
        if '~' in ref_string:
            ref, count = ref_string.split('~')
            count = int(count)
            commit_sha = self._commit_sha(ref)
            return self.get_previous_commit(commit_sha, count)
        return self._commit_sha(ref_string)

    def _commit_tree(self, commit_sha):
        """Return the tree object for a given commit
        """
        return self[commit_sha].tree

    def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True):
        diff_type = diff_type or self.DEFAULT_DIFF_TYPE
        diff_func = self.DIFF_FUNCTIONS[diff_type]

        if not compare_to:
            compare_to = self.get_previous_commit(commit_sha)

        return self._diff_between(compare_to, commit_sha, diff_function=diff_func)

    def diff_working(self, ref=None, filter_binary=True):
        """Diff between the current working directory and the HEAD
        """
        return utils.git.diff_changes_paths(
            self.repo.object_store,
            self.path,
            self._changed_entries(ref=ref),
            filter_binary=filter_binary
        )

    def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None):
        """Returns a dict of the following Format :
            {
                "directory/filename.txt": {
                    'name': 'filename.txt',
                    'path': "directory/filename.txt",
                    "sha": "xxxxxxxxxxxxxxxxxxxx",
                    "data": "blablabla",
                    "mode": 0xxxxx",
                },
                ...
            }
        """
        # Default values
        context = {}
        is_tree = is_tree or False
        parent_path = parent_path or ''

        if is_tree:
            tree = self[commit_sha]
        else:
            tree = self[self._commit_tree(commit_sha)]

        for entry in tree.items():
            # Check if entry is a directory
            if entry.mode == self.MODE_DIRECTORY:
                context.update(
                    self.get_commit_files(entry.sha, parent_path=os.path.join(parent_path, entry.path), is_tree=True, paths=paths)
                )
                continue

            subpath = os.path.join(parent_path, entry.path)

            # Only add the files we want
            if not(paths is None or subpath in paths):
                continue

            # Add file entry
            context[subpath] = {
                'name': entry.path,
                'path': subpath,
                'mode': entry.mode,
                'sha': entry.sha,
                'data': self.blob_data(entry.sha),
            }
        return context

    def file_versions(self, path):
        """Returns all commits where given file was modified
        """
        versions = []
        commits_info = self.commit_info()
        seen_shas = set()

        for commit in commits_info:
            try:
                files = self.get_commit_files(commit['sha'], paths=[path])
                file_path, file_data = files.items()[0]
            except IndexError:
                continue

            file_sha = file_data['sha']

            if file_sha in seen_shas:
                continue
            else:
                seen_shas.add(file_sha)

            # Add file info
            commit['file'] = file_data
            versions.append(file_data)
        return versions

    def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True):
        """Internal method for getting a diff between two commits
            Please use .diff method unless you have very specific needs
        """

        # If commit is first commit (new_commit_sha == old_commit_sha)
        # then compare to an empty tree
        if new_commit_sha == old_commit_sha:
            old_tree = Tree()
        else:
            old_tree = self._commit_tree(old_commit_sha)

        new_tree = self._commit_tree(new_commit_sha)

        return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary)

    def changes(self, *args, **kwargs):
        """ List of changes between two SHAs
            Returns a list of lists of tuples :
            [
                [
                    (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
                ],
                ...
            ]
        """
        kwargs['diff_type'] = 'changes'
        return self.diff(*args, **kwargs)

    def changes_count(self, *args, **kwargs):
        return len(self.changes(*args, **kwargs))

    def _refs_by_pattern(self, pattern):
        refs = self.refs

        def item_filter(key_value):
            """Filter only concered refs"""
            key, value = key_value
            return key.startswith(pattern)

        def item_map(key_value):
            """Rewrite keys"""
            key, value = key_value
            new_key = key[len(pattern):]
            return (new_key, value)

        return dict(
            map(item_map,
                filter(
                    item_filter,
                    refs.items()
                )
            )
        )

    @property
    def refs(self):
        return self.repo.get_refs()

    def set_refs(refs_dict):
        for k, v in refs_dict.items():
            self.repo[k] = v

    def import_refs(self, base, other):
        return self.repo.refs.import_refs(base, other)

    @property
    def branches(self):
        return self._refs_by_pattern(self.REFS_BRANCHES)

    @property
    def active_branch(self):
        """Returns the name of the active branch, or None, if HEAD is detached
        """
        x = self.repo.refs.read_ref('HEAD')
        if not x.startswith(SYMREF):
            return None
        else:
            symref = x[len(SYMREF):]
            if not symref.startswith(self.REFS_BRANCHES):
                return None
            else:
                return symref[len(self.REFS_BRANCHES):]

    @property
    def active_sha(self):
        """Deprecated equivalent to head property
        """
        return self.head

    @property
    def remote_branches(self):
        return self._refs_by_pattern(self.REFS_REMOTES)

    @property
    def tags(self):
        return self._refs_by_pattern(self.REFS_TAGS)

    @property
    def remotes(self):
        """ Dict of remotes
        {
            'origin': 'http://friendco.de/some_user/repo.git',
            ...
        }
        """
        config = self.repo.get_config()
        return {
            keys[1]: values['url']
            for keys, values in config.items()
            if keys[0] == 'remote'
        }

    def add_remote(self, remote_name, remote_url):
        # Get repo's config
        config = self.repo.get_config()

        # Add new entries for remote
        config.set(('remote', remote_name), 'url', remote_url)
        config.set(('remote', remote_name), 'fetch', "+refs/heads/*:refs/remotes/%s/*" % remote_name)

        # Write to disk
        config.write_to_path()

        return remote_name

    def add_ref(self, new_ref, old_ref):
        self.repo.refs[new_ref] = old_ref
        self.update_server_info()

    def remove_ref(self, ref_name):
        # Returns False if ref doesn't exist
        if not ref_name in self.repo.refs:
            return False
        del self.repo.refs[ref_name]
        self.update_server_info()
        return True

    def create_branch(self, base_branch, new_branch, tracking=None):
        """Try creating a new branch which tracks the given remote
            if such a branch does not exist then branch off a local branch
        """

        # The remote to track
        tracking = self.DEFAULT_REMOTE

        # Already exists
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        # Get information about remote_branch
        remote_branch = os.path.sep.join([tracking, base_branch])

        # Fork Local
        if base_branch in self.branches:
            base_ref = self._format_ref_branch(base_branch)
        # Fork remote
        elif remote_branch in self.remote_branches:
            base_ref = self._format_ref_remote(remote_branch)
            # TODO : track
        else:
            raise Exception("Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking))

        # Reference of new branch
        new_ref = self._format_ref_branch(new_branch)

        # Copy reference to create branch
        self.add_ref(new_ref, base_ref)

        return new_ref

    def create_orphan_branch(self, new_branch, empty_index=None):
        """ Create a new branch with no commits in it.
        Technically, just points HEAD to a non-existent branch.  The actual branch will
        only be created if something is committed.  This is equivalent to:

            git checkout --orphan <new_branch>,

        Unless empty_index is set to True, in which case the index will be emptied along
        with the file-tree (which is always emptied).  Against a clean working tree,
        this is equivalent to:

            git checkout --orphan <new_branch>
            git reset --merge
        """
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        new_ref = self._format_ref_branch(new_branch)
        self.repo.refs.set_symbolic_ref('HEAD', new_ref)

        if self.is_working:
            if empty_index:
               self.rm_all()
            self.clean_working()

        return new_ref

    def remove_branch(self, branch_name):
        ref = self._format_ref_branch(branch_name)
        return self.remove_ref(ref)

    def switch_branch(self, branch_name, tracking=None, create=None):
        """Changes the current branch
        """
        if create is None:
            create = True

        # Check if branch exists
        if not branch_name in self.branches:
            self.create_branch(branch_name, branch_name, tracking=tracking)

        # Get branch reference
        branch_ref = self._format_ref_branch(branch_name)

        # Change main branch
        self.repo.refs.set_symbolic_ref('HEAD', branch_ref)

        if self.is_working:
            # Remove all files
            self.clean_working()

            # Add files for the current branch
            self.checkout_all()

    def create_tag(self, tag_name, target):
        ref = self._format_ref_tag(tag_name)
        return self.add_ref(ref, self._parse_reference(target))

    def remove_tag(self, tag_name):
        ref = self._format_ref_tag(tag_name)
        return self.remove_ref(ref)

    def clean(self, force=None, directories=None):
        untracked_files = self.untracked_files
        map(os.remove, untracked_files)
        return untracked_files

    def clean_working(self):
        """Purges all the working (removes everything except .git)
            used by checkout_all to get clean branch switching
        """
        return self.clean()

    def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None):
        tree = self[tree_sha]
        structure = {}
        if depth is None:
            depth = self.MAX_TREE_DEPTH
        elif depth == 0:
            return structure
        for entry in tree.items():
            # tree
            if entry.mode == self.MODE_DIRECTORY:
                # Recur
                structure[entry.path] = self._get_fs_structure(entry.sha, depth=depth - 1, parent_sha=tree_sha)
            # commit
            else:
                structure[entry.path] = entry.sha
        structure['.'] = tree_sha
        structure['..'] = parent_sha or tree_sha
        return structure

    def _get_fs_structure_by_path(self, tree_sha, path):
        parts = path.split(os.path.sep)
        depth = len(parts) + 1
        structure = self._get_fs_structure(tree_sha, depth=depth)

        return funky.subkey(structure, parts)

    def commit_ls(self, ref, subpath=None):
        """List a "directory" for a given commit
           using the tree of that commit
        """
        tree_sha = self._commit_tree(ref)

        # Root path
        if subpath in self.ROOT_PATHS or not subpath:
            return self._get_fs_structure(tree_sha, depth=1)
        # Any other path
        return self._get_fs_structure_by_path(tree_sha, subpath)

    def commit_file(self, ref, path):
        """Return info on a given file for a given commit
        """
        name, info = self.get_commit_files(ref, paths=[path]).items()[0]
        return info

    def commit_tree(self, ref, *args, **kwargs):
        tree_sha = self._commit_tree(ref)
        return self._get_fs_structure(tree_sha, *args, **kwargs)

    def update_server_info(self):
        if not self.is_bare:
            return
        update_server_info(self.repo)

    def _is_fast_forward(self):
        pass

    def _merge_fast_forward(self):
        pass

    def __hash__(self):
        """This is required otherwise the memoize function will just mess it up
        """
        return hash(self.path)

    def __getitem__(self, key):
        try:
            sha = self._parse_reference(key)
        except:
            raise KeyError(key)
        return self.repo[sha]

    def __setitem__(self, key, value):
        try:
            key = self.dwim_reference(key)
        except:
            pass
        self.repo[key] = value

    def __contains__(self, key):
        try:
            key = self.dwim_reference(key)
        except:
            pass
        return key in self.repo

    def __delitem__(self, key):
        try:
            key = self.dwim_reference(key)
        except:
            raise KeyError(key)
        self.remove_ref(key)


    # Alias to clone_bare
    fork = clone_bare
    log = commit_info
    diff_count = changes_count
    contributors = recent_contributors
예제 #42
0
class Game(object):
    "A versioned game"
    def __init__(self, name=None, **options):
        self.name = name or uuid.uuid4().hex
        self.options = dict(DEFAULTS, **options)
        self.data = self.options.pop('data').format(name=self.name)
        new = False

        self.repo = None
        if not os.path.exists(self.data):
            if not self.options['create']: raise GameError("Game does not exist")
            os.makedirs(self.data)

        try:
            self.repo = Repo(self.data)
        except dulwich.errors.NotGitRepository:
            if not self.options['create']: raise GameError("Game does not exist")
            self.repo = Repo.init_bare(self.data)
            new = True


        self.board = (new and BoardState()) or self.get_board()

        if new: self.save("New blank board for game: %s" % self.name)

    @property
    def branch(self):
        head = self.repo.refs.read_ref('HEAD')
        if head and head.startswith('ref: '):
            head = head.split(': ')[-1]
            head = head.replace('refs/heads/', '')
            return head
        return 'master'

    def _tree(self, branch=None):
        branch = branch or self.branch
        try: return self.repo[
                      self.repo['refs/heads/%s' % branch].tree
                    ]
        except KeyError: return Tree()

    def signature(self, of=None):
        of = (of and "refs/heads/%s" % of) or "HEAD"
        try: return self.repo.refs[of]
        except KeyError: return None


    def get_board(self, branch=None):
        branch = branch or self.branch
        if branch not in self.branches(): raise GameError("Unknown branch")
        return BoardState.from_json(
            self.repo[
                  [t[2] 
                   for t in self._tree(branch).entries() # [(mode, name, sha)...]
                   if t[1] == 'board.json'].pop()
                 ].data)

    def set_branch(self, new):
        if 'refs/heads/%s' % new in self.repo.get_refs().keys():
            self.repo.refs.set_symbolic_ref('HEAD', 'refs/heads/%s' % new)
            return self.branch
        return False

    def branches(self):
        return sorted([name.replace('refs/heads/', '')
                       for (name, sig) in self.repo.get_refs().items()
                       if name != "HEAD"])

    def make_branch(self, name, back=0):
        if ('refs/heads/%s' % name) in self.repo.get_refs().keys():
            raise GameError("I already have this branch")
        try:
            head = self.repo.head()
            history = self.repo.revision_history(head)
            self.repo.refs['refs/heads/%s' % name] = history[back].id
        except IndexError:
            raise GameError("Trying to go {back} which is further than history".format(back=back))
        return True


    def save(self, message="Forced commit"):
        blob = Blob.from_string(self.board.as_json())
        tree = self._tree()
        tree.add(0100644, 'board.json', blob.id)

        [self.repo.object_store.add_object(it)
         for it in (blob, tree)]

        self.repo.do_commit(message, committer="Game %s" % self.name, tree=tree.id)

    def move(self, x, y):
        player = self.board.player_turn()
        if not self.board.game_over and self.board.move(x, y):
            self.save("{player} moved to ({x}, {y})".format(player=player,
                                                            x=x,
                                                            y=y))
            return player
        return None

    def skip(self):
        player = self.board.player_turn()
        if not self.board.game_over:
            self.board.move(None)
            is_or_isnt = (self.board.game_over and "is") or "is NOT"
            self.save("{player} skipped, game {maybe} over".format(player=player,
                                                                   maybe=is_or_isnt))
        return self.board.game_over or self.board.player_turn()

    def who(self):
        return self.board.player_turn()

    def scores(self):
        return self.board.scores()

    def winner(self):
        return self.board.winner

    def __unicode__(self):
        return "Game: {name} {black} vs {white} on {x}x{y} from {data} :: {board}".format(
                 name=self.name,
                 board=self.board,
                 data=self.data,
                 **self.options
               )
    __str__=__unicode__

    def __repr__(self):
        return "<%s>" % self
예제 #43
0
파일: backend.py 프로젝트: warrd/git-papers
class GitPapersApp(object):
    
    def __init__(self, path='.'):

        try:
            self.repo = Repo(path)

        except NotGitRepository:
            raise RepoNotInitialised()
        
        self._db = None
        self._root = None

    def commit(self, paths, commit_message):
        
        """Stage the given paths and commit."""

        # Must be relative to the repo
        def clean_path(path):
            rpath = self.repo.path
            if path.startswith(rpath) and path[len(rpath)]=='/':
                return path[len(rpath)+1:]
            return path

        paths = map(clean_path, paths)
        self.repo.stage(paths)

        self.repo.do_commit(
            message=str(commit_message),
            committer=COMMIT_AUTHOR,
        )

    def history(self):

        """Geneerate individual papers, starting from most recent.""" 

        # Generate from index not commit history

        for key in self.keymap:
            yield Paper.load(self.repo.path, self.keymap[key])

    #
    # Key related functions/properties
    #

    @property
    def keymap(self):
        # Load keymap lazily
        if not hasattr(self, '_keymap'):

            MetaData = namedtuple('MetaData', 'timestamp, path, ext, reftype')
            kmap = {}

            with open(osp.join(self.repo.path, '.index')) as f:
                for line in f:
                    if not line:
                        continue
                    key, ts, path, ext, reftype = self._parse_idx(line)
                    if key in kmap:
                        raise InvalidIndex('Duplicate key: %s' %key)
                    kmap[key] = MetaData(ts, path,
                            ext if ext is not 'nofile' else None, reftype)

            self._keymap = OrderedDict(sorted(kmap.items(),
                                       key=lambda x:-float(x[1].timestamp)))

        return self._keymap

    def _parse_idx(self, item):
        
        """Create tuple from single line in the index."""
        
        if item.endswith('\n'):
            item = item[:-1]
        try:
            key, timestamp, path, ext, reftype =item.split(':')
        except ValueError:
            raise InvalidIndex('Malformed line: %s' %item)
        else:
            return (key, timestamp, path, ext, reftype)

    def __contains__(self, key):

        """Check whether a particular key exists in the database."""

        return key in self.keymap

    def __len__(self):
        
        """The number of papers in the database."""

        return len(self.keymap)

    def __getitem__(self, key):
        
        """Retrieve the paper with the given key."""

        key = self._matchkey(key)
        metadata = self.keymap[key]
        return Paper.load(self.repo.path, metadata)

    def __iter__(self):
        
        for key in self.keymap:
            yield self[key]

    def _matchkey(self, key):

        """Matches the key against an entry in the index. Partial keys and
        directory names are permitted."""

        if key.startswith('.'):
            pass
            # TODO: Could be directory
            #raise IllegalKey(key)
        
        if not key in self:

            # Try splitting the key to just base
            if '/' in key:
                if key.endswith('/'):
                    key = key[:-1]
                key, _ = osp.splitext(osp.basename(key))

            # Try matching partial keys
            if not key in self:
                matches = [k for k in self.keymap if k.startswith(key)]
                if len(matches)>1:
                    raise MultipleKeyMatches(key, matches)
                if not len(matches):
                    raise NonexistentKey(key)
                key = matches[0]

        return key

    #
    # Commands
    #
    
    @classmethod
    def init(cls, path='.'):

        """Initialise a new git-papers repo at the given path and commit the
        basic directory structure."""

        # TODO: do we need seperate .db directory?

        try:
            Repo(path)
        except NotGitRepository:
            pass
        else:
            raise RepoInitialised()

        Repo.init(path)
        app = cls(path)

        emptyfiles = ['.index', '.tags', '.toread']

        for path in emptyfiles:

            try:
                with open(path, 'w'):
                    pass
            except (IOError, OSError) as e:

                from shutil import rmtree
                # TODO: remove created emptyfiles
                rmtree(osp.join(path, '.git'))
                raise FileCreationFailed(e.message)
        
        app.commit(emptyfiles, INIT)
        return app

    def _add_to_index(self, paper):
        
        """Add the paper to the end of the index."""

        with open(osp.join(self.repo.path, '.index'), 'a') as f:

            # Save path relative to the repo
            path = paper.path
            if self.repo.path is not '.' and path.startswith(self.repo.path):
                path = path[len(self.repo.path)+1:]

            f.write('{key}:{ts}:{path}:{ext}:{reftype}\n'.format(
                key=paper.ref.key,
                ts=paper.timestamp,
                path=path,
                ext=paper.ext if paper.ext else 'nofile',
                reftype=type(paper.ref).__name__,
            ))

    def add(self, paperfile, reffunc, reftype=BibRef):

        """Add a new paper to the database, fetching the reference with the
        given reffunc."""

        ref = BibRef.deserialize(reffunc())

        if ref.key in self:
            raise DuplicateKey(ref.key)

        path = osp.join(self.repo.path, ref.type, ref.key)

        paper = Paper.new(path, ref, paperfile)

        self._add_to_index(paper)

        idx_path = osp.join(self.repo.path, '.index')
        if not paperfile:
            self.commit(
                [idx_path, paper.ref_path],
                commit_message(ADD, paper.key)
            )
        else:
            self.commit(
                [idx_path, paper.filepath, paper.ref_path],
                commit_message(ADD, paper.key)
            )

        return paper
    
    def rm(self, key):
        
        """Remove the paper from the repo."""
        pass # TODO: stub

    def tag(self, key, suggest=True):

        paper = self[key]

        with open(osp.join(self.repo.path, '.tags'), 'r') as f:

            all_tags = TagSet(f.read())
            
            current_tags = paper.tags
예제 #44
0
    repo
    
    index = repo.open_index()
    print(index.path.decode(sys.getfilesystemencoding()))
    
    list(index)

    f = open('unscraper/thisIsATest.md','wb')
    _ = f.write(b"monty")
    f.close()

    repo.stage([b"thisIsATest"])

    print(",".join([f.decode(sys.getfilesystemencoding()) for f in repo.open_index()]))

    commit_id = repo.do_commit(
        b"testing dulwich", committer=b"Aly <*****@*****.**>")

    repo.head() == commit_id
    print ('**************************************************')
    print ('******************batch complete******************')
    print ('**************************************************')
    time.sleep(86400) #24hr time delay       
#end of program



        

               
def move(path):
    debug("MOVE with path '%s'" % (path))

    try:
        filename = secure_filename(path)
    except Exception as e:
        debug("secure_filename failed: %s:%s" % (path, str(e)))

    debug("ok: secure_filename succeed %s" % filename)
    # Prevent uploading file with more than 1 dot.
    dotCount = filename.count('.')
    if dotCount != 2:
        error("file do not contains 2 dot.")
        end(403, "file not contains 2 dot!")

    debug("ok: file contains just 2 dot.")

    root, ext = os.path.splitext(filename)
    first_ext = os.path.splitext(root)[1].lower()
    extension = first_ext + ext
    extension = extension.lower()
    if extension not in allowed_extention:
        error("file extension NOT allowed '%s'." % extension)
        debug("error: allowed %s." % (pp.pformat(allowed_extention)))
        end(403, "file extension not allowed!")

    debug("ok: file extension allowed.")

    basepath = os.path.join(config['MediaRoot'])
    filepath = os.path.join(basepath, filename)
    if not os.path.isdir(basepath):
        debug("error: Folder do not exist %s" % str(basepath))
        end(403, "oups, Folder do not exist '%s'." % (str(basepath)))

    if not os.path.isfile(filepath):
        debug("error: Folder do not exist %s" % str(filepath))
        end(403, "oups, Folder do not exist '%s'." % (str(filepath)))

    dest = request.headers.get('Destination')
    dest = re.sub(r'^https?://'+realhost+'/'+root_url, '', dest)
    dest = urllib.parse.unquote(dest, encoding='utf-8')

    try:
        destfilename = secure_filename(dest)
    except Exception as e:
        debug("secure_filename failed: %s:%s" % (dest, str(e)))

    debug("ok: secure_filename succeed %s" % destfilename)
    # Prevent uploading file with more than 1 dot.
    dotCount = destfilename.count('.')
    if dotCount != 1:
        error("destfile do contains 1 dot.")
        end(403, "destfile contains 1 dot!")

    debug("ok: destfile contains just 1 dot.")

    extension = os.path.splitext(destfilename)[1].lower()
    if extension not in allowed_extention:
        error("desfile extension NOT allowed '%s'." % extension)
        debug("error: allowed %s." % (pp.pformat(allowed_extention)))
        end(403, "destfile extension not allowed!")

    destfilepath = os.path.join(basepath, destfilename)
    if os.path.isfile(destfilepath):
        debug("File '%s' exist on system." % (destfilepath))
        end(404, "File exist")

    try:
        os.rename(filepath, destfilepath)
    except FileExistsError:
        debug("Could not move file from '%s' to '%s' on system." % (filepath, destfilepath))
        end(403, "Could not move file.")
    except Exception as e:
        debug("error: %s" % str(e))
        debug("Could not move file from '%s' to '%s' on system." % (filepath, destfilepath))
        end(403, "Could not move file.")

    # The interresting stuff now, we take a Git image.
    gitbasepath = os.path.join(basepath, '.git')
    if not os.path.isdir(gitbasepath):
        debug("error: Folder do not exist %s" % str(gitbasepath))
        try:
            repo = Repo.init(basepath, mkdir=False)
        except Exception as e:
            debug("Git repo creation failed:%s:%s" % (basepath, str(e)))
    GIT_REPOSITORY = Repo(basepath)

    try:
        if 'Authorization' in request.headers:
            # "Authorization: Basic BASE64"
            real_committer = request.headers.get('Authorization')
            real_committer = base64.b64decode(real_committer.split(' ')[1]).decode('utf-8').split(':', 1)[0]
            real_committer = urllib.parse.unquote(real_committer, encoding='utf-8')
            real_firstname = real_committer.split('@', 1)[0]
            real_name = real_committer.split('@', 1)[1].split('.', 1)[0]
            real_committer = real_firstname.title() + ' ' + real_name.title() + ' <' + real_committer + '>'
            debug("ok: real_committer:%s" % (real_committer))

    except Exception as e:
        debug("Git commiter fetch: failed:%s" % (str(e)))

    if not real_committer:
        real_committer = config("Committer")
        debug("ok: fake_committer:%s" % (real_committer))

    try:
        GIT_REPOSITORY.stage([destfilename])
        GIT_REPOSITORY.do_commit(basepath, committer=real_committer)
    except Exception as e:
        debug("Git repo commit failed:%s" % (str(e)))

    return '', 204
예제 #46
0
class Store(object):
    """
    A simple key/value store using git as the backing store.
    """
    def __init__(self, repo_path, serializer=None):
        if os.path.exists(repo_path):
            self.repo = Repo(repo_path)
        else:
            raise ValueError("Store repo path does not exist: %s" % repo_path)
        if not serializer:
            self.serializer = json
        else:
            self.serializer = serializer

    def create_branch(self, branch, parent=None):
        if not parent:
            parent = self.branch_head('master')
        branch_ref = self._branch_ref_name(branch)
        self.repo.refs.add_if_new(branch_ref, parent)
        return {'sha': self.branch_head(branch)}

    def merge(self,
              source_branch,
              target_branch='master',
              author=None,
              committer=None):
        if source_branch == target_branch:
            raise ValueError("Cannot merge branch with itself %s" %
                             source_branch)
        target_tree = self._get_object(ROOT_PATH, target_branch)
        branch_tree = self._get_object(ROOT_PATH, source_branch)
        for tc in diff_tree.tree_changes(self.repo.object_store,
                                         target_tree.id, branch_tree.id):
            if tc.type == diff_tree.CHANGE_ADD:
                self._add_tree(target_tree,
                               ((tc.new.path, tc.new.sha, tc.new.mode), ))
            if tc.type == diff_tree.CHANGE_COPY:
                pass
            if tc.type == diff_tree.CHANGE_DELETE:
                target_tree = self._delete(tc.old.path, target_branch)
            if tc.type == diff_tree.CHANGE_MODIFY:
                self._add_tree(target_tree,
                               ((tc.new.path, tc.new.sha, tc.new.mode), ))
            if tc.type == diff_tree.CHANGE_RENAME:
                pass
            if tc.type == diff_tree.CHANGE_UNCHANGED:
                pass
        msg = "Merge %s to %s" % (source_branch, target_branch)
        merge_heads = [self.branch_head(source_branch)]
        sha = self.repo.do_commit(tree=target_tree.id,
                                  message=msg,
                                  ref=self._branch_ref_name(target_branch),
                                  merge_heads=merge_heads,
                                  author=author,
                                  committer=committer)
        return {'sha': sha}

    def get(self, key, shallow=False, branch='master', commit_sha=None):
        """
        Get a tree or blob from the store by key.  The key param can be paths such as 'a/b/c'.
        If the key requested represents a Tree in the git db, then a document will be
        returned in the form of a python dict.  If the key requested represents a Blob
        in the git db, then a python string will be returned.

        :param key: The key to retrieve from the store
        :param branch: The branch name to search for the requested key
        :return: Either a python dict or string depending on whether the requested key points to a git Tree or Blob
        """
        obj = self._get_object(key, branch, commit_sha)
        if obj:
            if isinstance(obj, Blob):
                return self.serializer.loads(obj.data)
            elif isinstance(obj, Tree):
                keys = key.split('/')
                depth = None
                if shallow:
                    depth = len(keys)
                tree = self.trees(key, depth=depth, branch=branch)
                if keys != [ROOT_PATH]:
                    for k in keys:
                        tree = tree[k]
                return tree
        return None

    def _get_object(self, key, branch='master', commit_sha=None):
        try:
            if not commit_sha:
                commit_sha = self.branch_head(branch)
            (mode, sha) = tree_lookup_path(self.repo.get_object,
                                           self._repo_tree(commit_sha), key)
            return self.repo[sha]
        except KeyError:
            return None

    def put(self,
            key,
            value,
            flatten_keys=True,
            branch='master',
            author=None,
            committer=None):
        """
        Add/Update many key value pairs in the store.  The entries param should be a python
        dict containing one or more key value pairs to store.  The keys can be nested
        paths of objects to set.

        :param key: The key to store the entry/entries in
        :param value: The value to store.
        """
        e = {key: value}
        if flatten_keys:
            e = flatten(e)
        root_tree = self._get_object(ROOT_PATH, branch)
        merge_heads = []
        if not root_tree:
            root_tree = self._get_object(ROOT_PATH)
            merge_heads = [self.branch_head('master')]
        blobs = []
        msg = ''
        for (key, value) in e.iteritems():
            blob = Blob.from_string(self.serializer.dumps(value))
            self.repo.object_store.add_object(blob)
            blobs.append((key, blob.id, stat.S_IFREG))
            msg += "Put %s\n" % key
        root_id = self._add_tree(root_tree, blobs)
        sha = self.repo.do_commit(tree=root_id,
                                  message=msg,
                                  ref=self._branch_ref_name(branch),
                                  merge_heads=merge_heads,
                                  author=author,
                                  committer=committer)
        return {'sha': sha}

    def delete(self, key, branch='master', author=None, committer=None):
        """
        Delete one or more entries from the store.  The key param can refer to either
        a Tree or Blob in the store.  If it refers to a Blob, then just that entry will be
        removed.  If it refers to a Tree, then that entire subtree will be removed.

        :param key: The key to remove from the store.
        """
        tree = self._get_object(key, branch)
        merge_heads = []
        delete_branch = branch
        if not tree:
            merge_heads = [self.branch_head('master')]
            delete_branch = 'master'
        root = self._delete(key, delete_branch)
        sha = self.repo.do_commit(tree=root.id,
                                  message="Delete %s" % key,
                                  ref=self._branch_ref_name(branch),
                                  merge_heads=merge_heads,
                                  author=author,
                                  committer=committer)
        return {'sha': sha}

    def _delete(self, key, branch='master'):
        trees = {}
        path = key
        if path:
            while path:
                (path, name) = pathsplit(path)
                trees[path] = self._get_object(path, branch)
        else:
            trees[ROOT_PATH] = self._get_object(ROOT_PATH, branch)
        (path, name) = pathsplit(key)
        if name:
            del trees[path][name]
        else:
            for entry in trees[path].iteritems():
                del trees[path][entry.path]
        if path:
            while path:
                (parent_path, name) = pathsplit(path)
                trees[parent_path].add(name, stat.S_IFDIR, trees[path].id)
                self.repo.object_store.add_object(trees[path])
                path = parent_path
            self.repo.object_store.add_object(trees[ROOT_PATH])
        else:
            self.repo.object_store.add_object(trees[ROOT_PATH])
        return trees[ROOT_PATH]

    def _repo_tree(self, commit_sha):
        return self.repo[commit_sha].tree

    def keys(self,
             path=ROOT_PATH,
             pattern=None,
             depth=None,
             filter_by=None,
             branch='master',
             commit_sha=None):
        """
        Returns a list of keys from the store.  The path param can be used to scope the
        request to return keys from a subset of the tree.  The filter_by param can be used
        to control whether to return keys for Blob nodes, Tree nodes or all nodes.  Default
        is to return all node keys from the root of the store.

        :param path: The starting point retrieve key paths from.  Default is '' which
        starts from the root of the store.
        :param filter_by: Either 'blob', 'tree' or None.  Controls what type of node key
        paths to return.  Default is None which returns all node type key paths
        :param branch: The branch name to return key paths for.
        :return: A list of keys sorted lexically.
        """
        if filter_by == 'blob':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Blob)
        elif filter_by == 'tree':
            filter_fn = lambda tree_entry: isinstance(tree_entry[1], Tree)
        else:
            filter_fn = None
        return map(
            lambda x: x[0],
            filter(filter_fn,
                   self.raw_entries(path, pattern, depth, branch, commit_sha)))

    def entries(self,
                path=ROOT_PATH,
                pattern=None,
                depth=None,
                branch='master',
                commit_sha=None):
        for key, obj in self.raw_entries(path, pattern, depth, branch,
                                         commit_sha):
            if isinstance(obj, Blob):
                yield (key, self.serializer.loads(str(obj.data)))

    def raw_entries(self,
                    path=ROOT_PATH,
                    pattern=None,
                    depth=None,
                    branch='master',
                    commit_sha=None):
        """
        Returns a generator that traverses the tree and produces entries of the form
        (tree_path, git_object), where tree_path is a string representing a key into the
        store and git_object is either a git Blob or Tree object.

        :param path: String key to begin producing result entries from.  Defaults to
        '' which starts from the root of the store.
        :param pattern: Regex pattern to filter matching tree paths.
        :param depth: Specifies how deep to recurse when producing results.  Default is None which
        does full tree traversal.
        :param branch: Git branch name to return key paths for.  Defaults to HEAD.
        :return: A generator that produces entries of the form (tree_path, git_object)
        """
        tree = self._get_object(path, branch, commit_sha)
        if not isinstance(tree, Tree):
            raise ValueError("Path %s is not a tree!" % path)
        else:
            if not pattern:
                pattern = MATCH_ALL
            return self._entries(path, tree, pattern, depth)

    def _entries(self, path, tree, pattern, depth=None):
        for tree_entry in tree.iteritems():
            obj = self.repo[tree_entry.sha]
            key = self._tree_entry_key(path, tree_entry)
            if pattern.match(key):
                yield (key, obj)
            if isinstance(obj, Tree):
                if not depth:
                    for te in self._entries(key, obj, pattern, depth):
                        yield te
                else:
                    if depth > 1:
                        for te in self._entries(key, obj, pattern, depth - 1):
                            yield te

    def trees(self,
              path=ROOT_PATH,
              pattern=None,
              depth=None,
              object_depth=None,
              branch='master',
              commit_sha=None):
        """
        Returns a python dict representation of the store.  The resulting dict can be
        scoped to a particular subtree in the store with the tree or path params.  The
        tree param is a git Tree object to begin from, while the path is a string key
        to begin from.  The branch param is used to specify the git branch name
        to build the dict from.

        :param path: Option string key to begin building the dict from.  Defaults to
        '' which starts from the root of the store.
        :param pattern: Regex pattern to filter matching tree paths.
        :param depth: Specifies how deep to recurse when producing results.  Default is None which
        does full tree traversal.
        :param branch: Optional git branch name to return key paths from.
        Defaults to HEAD.
        :return: A dict represents a section of the store.
        """
        tree = {}
        for path, value in self.entries(path, pattern, depth, branch,
                                        commit_sha):
            expand_tree(path, value, tree, object_depth)
        return tree

    def _tree_entry_key(self, path, tree_entry):
        if path:
            return "%s/%s" % (path, tree_entry.path)
        else:
            return tree_entry.path

    def _branch_ref_name(self, name):
        if name.startswith('refs/heads/'):
            return name
        else:
            return "refs/heads/%s" % name

    def branch_head(self, name):
        return self.repo.refs[self._branch_ref_name(name)]

    def _add_tree(self, root_tree, blobs):
        """Commit a new tree.

        :param root_tree: Root tree to add trees to
        :param blobs: Iterable over blob path, sha, mode entries
        :return: SHA1 of the created tree.
        """
        trees = {"": {}}

        def add_tree(path):
            if path in trees:
                return trees[path]
            dirname, basename = pathsplit(path)
            t = add_tree(dirname)
            assert isinstance(basename, basestring)
            newtree = {}
            t[basename] = newtree
            trees[path] = newtree
            return newtree

        for path, sha, mode in blobs:
            tree_path, basename = pathsplit(path)
            tree = add_tree(tree_path)
            tree[basename] = (mode, sha)

        def build_tree(path):
            if path:
                tree = self._get_object(path)
                if not tree:
                    tree = Tree()
                if not isinstance(tree, Tree):
                    self.delete(path)
                    tree = Tree()
            else:
                tree = root_tree
            for basename, entry in trees[path].iteritems():
                if type(entry) == dict:
                    mode = stat.S_IFDIR
                    sha = build_tree(pathjoin(path, basename))
                else:
                    (mode, sha) = entry
                tree.add(basename, mode, sha)
            self.repo.object_store.add_object(tree)
            return tree.id

        return build_tree("")