Exemple #1
0
    def test_simple_local(self):
        f1_1 = make_object(Blob, data=b'f1')
        commit_spec = [[1], [2, 1], [3, 1, 2]]
        trees = {1: [(b'f1', f1_1), (b'f2', f1_1)],
                 2: [(b'f1', f1_1), (b'f2', f1_1)],
                 3: [(b'f1', f1_1), (b'f2', f1_1)], }

        c1, c2, c3 = build_commit_graph(self.repo.object_store,
                                        commit_spec, trees)
        self.repo.refs[b"refs/heads/master"] = c3.id
        self.repo.refs[b"refs/tags/foo"] = c3.id
        target_path = tempfile.mkdtemp()
        errstream = BytesIO()
        self.addCleanup(shutil.rmtree, target_path)
        r = porcelain.clone(self.repo.path, target_path,
                            checkout=False, errstream=errstream)
        self.addCleanup(r.close)
        self.assertEqual(r.path, target_path)
        target_repo = Repo(target_path)
        self.assertEqual(0, len(target_repo.open_index()))
        self.assertEqual(c3.id, target_repo.refs[b'refs/tags/foo'])
        self.assertTrue(b'f1' not in os.listdir(target_path))
        self.assertTrue(b'f2' not in os.listdir(target_path))
        c = r.get_config()
        encoded_path = self.repo.path
        if not isinstance(encoded_path, bytes):
            encoded_path = encoded_path.encode('utf-8')
        self.assertEqual(encoded_path, c.get((b'remote', b'origin'), b'url'))
        self.assertEqual(
            b'+refs/heads/*:refs/remotes/origin/*',
            c.get((b'remote', b'origin'), b'fetch'))
Exemple #2
0
    def dulwichCommit(self, filePath, fullPath, kind):

        git = Repo(AUTOGIT_PATH)
        staged = map(str, [filePath])
        git.stage(staged)

        index = git.open_index()

        try:
            committer = git._get_user_identity()
        except ValueError:
            committer = "autogit"

        try:
            head = git.head()
        except KeyError:
            return git.do_commit('%s - autogit commit (via dulwich)' % kind,
                                 committer=committer)

        changes = list(
            tree_changes(git, index.commit(git.object_store),
                         git['HEAD'].tree))
        if changes and len(changes) > 0:
            return git.do_commit('%s - autogit commit (via dulwich)' % kind,
                                 committer=committer)
        return None
Exemple #3
0
    def _clone_submodules(cls, repo: Repo) -> None:
        """
        Helper method to identify configured submodules and clone them recursively.
        """
        repo_root = Path(repo.path)
        modules_config = repo_root.joinpath(".gitmodules")

        if modules_config.exists():
            config = ConfigFile.from_path(modules_config)

            url: bytes
            path: bytes
            submodules = parse_submodules(config)  # type: ignore[no-untyped-call]
            for path, url, _ in submodules:
                path_relative = Path(path.decode("utf-8"))
                path_absolute = repo_root.joinpath(path_relative)

                source_root = path_absolute.parent
                source_root.mkdir(parents=True, exist_ok=True)

                with repo:
                    revision = repo.open_index()[path].sha.decode("utf-8")

                cls.clone(
                    url=url.decode("utf-8"),
                    source_root=source_root,
                    name=path_relative.name,
                    revision=revision,
                    clean=path_absolute.exists()
                    and not path_absolute.joinpath(".git").is_dir(),
                )
Exemple #4
0
    def test_simple_local(self):
        f1_1 = make_object(Blob, data=b'f1')
        commit_spec = [[1], [2, 1], [3, 1, 2]]
        trees = {
            1: [(b'f1', f1_1), (b'f2', f1_1)],
            2: [(b'f1', f1_1), (b'f2', f1_1)],
            3: [(b'f1', f1_1), (b'f2', f1_1)],
        }

        c1, c2, c3 = build_commit_graph(self.repo.object_store, commit_spec,
                                        trees)
        self.repo.refs[b"refs/heads/master"] = c3.id
        self.repo.refs[b"refs/tags/foo"] = c3.id
        target_path = tempfile.mkdtemp()
        errstream = BytesIO()
        self.addCleanup(shutil.rmtree, target_path)
        r = porcelain.clone(self.repo.path,
                            target_path,
                            checkout=False,
                            errstream=errstream)
        self.assertEqual(r.path, target_path)
        target_repo = Repo(target_path)
        self.assertEqual(0, len(target_repo.open_index()))
        self.assertEqual(c3.id, target_repo.refs[b'refs/tags/foo'])
        self.assertTrue(b'f1' not in os.listdir(target_path))
        self.assertTrue(b'f2' not in os.listdir(target_path))
        c = r.get_config()
        encoded_path = self.repo.path
        if not isinstance(encoded_path, bytes):
            encoded_path = encoded_path.encode('utf-8')
        self.assertEqual(encoded_path, c.get((b'remote', b'origin'), b'url'))
        self.assertEqual(b'+refs/heads/*:refs/remotes/origin/*',
                         c.get((b'remote', b'origin'), b'fetch'))
Exemple #5
0
 def _dulwich_status(self):
     """
     Return the git status
     """
     _repo = Repo(self.config['top_dir'])
     index = _repo.open_index()
     return list(tree_changes(_repo, index.commit(_repo.object_store),
                              _repo['HEAD'].tree))
Exemple #6
0
 def commit(repo: Repo, msg: str) -> str:
     """Commit everything."""
     for tree_path, entry in repo.open_index().items():
         full_path = os.path.join(repo.path.encode(), tree_path)
         blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
         if blob.id != entry.sha:
             repo.stage(tree_path)
     return repo.do_commit(msg.encode(), b"Source{d} ML Team <*****@*****.**>")
Exemple #7
0
class GitWhoosh:

    def __init__(self, repos_path, index_path):
        self.repo = Repo(repos_path)
        self.index_path = index_path
        self.git_index = self.repo.open_index()
        if not exists_in(self.index_path):
            schema = Schema(path=ID(unique=True, stored=True), itime=STORED, content=TEXT)
            self.ix = create_in(self.index_path, schema)
        else:
            self.ix = open_dir(self.index_path)

    def hook_index(self, func, path):
        mtime = self.git_index[path][1]
        sha = self.git_index[path][8]
        blob = self.repo[sha].as_raw_string()
        func(path=path.decode('utf-8'), content=blob.decode('utf-8'), itime=mtime)

    def index(self, regexp=None):
        with self.ix.searcher() as searcher:
            writer = self.ix.writer()
            # first of all, check for removed items
            paths = {}
            for fields in searcher.all_stored_fields():
                paths[fields['path']] = fields['itime']
                if not fields['path'] in self.git_index:
                    writer.delete_by_term('path', fields['path'])
            # now check for new or updated items
            for path in self.git_index:
                if regexp:
                    if not re.search(regexp, path): continue
                if path in paths:
                    if self.git_index[path][1] > paths[path.decode('utf-8')]:
                        self.hook_index(writer.update_document, path)       
                else:
                    self.hook_index(writer.add_document, path) 
            writer.commit()

    def search(self, query):
        parser = QueryParser('content', schema=self.ix.schema)
        q = parser.parse(query.decode('utf-8'))
        found_items = []
        with self.ix.searcher() as searcher:
            results = searcher.search(q, terms=True)
            for r in results:
                terms = []
                for term in r.matched_terms():
                    terms.append(term[1])
                found_items.append({'path':r['path'], 'terms':terms})
        return found_items

    def __call__(self, environ, start_response):
        start_response('200 OK', [('Content-Type', 'application/json')])
        output = []
        qs = environ.get('QUERY_STRING', None)
        if qs:
            output = self.search(urllib.unquote(qs))
        return json.dumps(output) 
Exemple #8
0
    def _ls_root(self, workspace=None):
        from dulwich.repo import Repo
        outstream = StringIO()
        r = Repo(self.workspace.working_dir)
        index = r.open_index()
        for blob in index.iterblobs():
            outstream.write('\t'.join(map(str, blob)) + '\n')

        return ''.join(outstream.getvalue()).encode(), b''
Exemple #9
0
 def _dulwich_status(self):
     """
     Return the git status
     """
     _repo = Repo(self.config['top_dir'])
     index = _repo.open_index()
     return list(
         tree_changes(_repo, index.commit(_repo.object_store),
                      _repo['HEAD'].tree))
Exemple #10
0
    def _ls_root(self, workspace=None):
        from dulwich.repo import Repo
        outstream = StringIO()
        r = Repo(self.workspace.working_dir)
        index = r.open_index()
        for blob in index.iterblobs():
            outstream.write('\t'.join(map(str, blob)) + '\n')

        return ''.join(outstream.getvalue()).encode(), b''
Exemple #11
0
def do_import(commits, repo_loc, overwrite = True, author_="Règlement général <*****@*****.**>"):
    if exists(repo_loc):
        if overwrite:
            print("Deleting existing output directory: %s" % repo_loc)
            shutil.rmtree(repo_loc)

            os.mkdir(repo_loc)
            repo = Repo.init(repo_loc)
        else:
            repo = Repo(repo_loc)
    else:
        os.mkdir(repo_loc)
        repo = Repo.init(repo_loc)


    print("Importing %d commit(s)" % len(commits))

    for i, commit in enumerate(commits):
        date = commit[0]
        print("Commit %d dated %s, %d items" % (i, str(date), len(commit[1])))
        print("  authored by %s" % author_)
        paths_added, paths_removed = create_tree(commit, repo_loc, readme=False, main=commit[2] if len(commit) == 3 else {})
        repo.stage([path.encode(sys.getfilesystemencoding()) for path in set(paths_added)])

        index = repo.open_index()

        print("  Removing %d files" % len(paths_removed))
        for p in paths_removed:
            del index[p.encode(sys.getfilesystemencoding())]
        index.write()

        author = bytes(author_, "UTF-8")

        repo.do_commit(
            bytes("Version du %s" % date.strftime(FMT), "UTF-8"),
            committer=author,
            commit_timestamp=date.timestamp(),
            commit_timezone=int(TZ_PARIS.localize(date).strftime("%z")) * 36)

        ## create tag
        tag_name = bytes(date.strftime(ISO_8601), "UTF-8")
        object = parse_object(repo, "HEAD")
        tag = Tag()
        tag.tagger = author
        tag.name = tag_name
        tag.message = b''
        tag.object = (type(object), object.id)
        tag.tag_time = int(time.time())
        tag.tag_timezone = int(TZ_PARIS.localize(date).strftime("%z")) * 36
        repo.object_store.add_object(tag)
        tag_id = tag.id

        repo.refs[b'refs/tags/' + tag_name] = tag_id

    repo.close()
Exemple #12
0
def main(args, hear, talk, complain):
    """Reset boring changes

    See doc-string of this file for outline.

    Required arguments - args, hear, talk and complain -- should,
    respectively, be (or behave as, e.g. if mocking to test) sys.argv,
    sys.stdin, sys.stdout and sys.stderr.  The only command-line
    option supported (in args) is a '--disclaim' flag, to treat as
    boring all changes in files with the standard 'We mean it'
    disclaimer; it is usual to pass this flag.\n"""
    ignore = Scanner.disclaimed if '--disclaim' in args else (
        lambda p, w: False)

    # We're in the root directory of the module:
    repo = Repo('.')
    store, index = repo.object_store, repo.open_index()
    renamer = RenameDetector(store)
    try:
        # TODO: demand stronger similarity for a copy than for rename;
        # our huge copyright headers (and common boilerplate) make
        # small header files look very similar despite their real
        # content all being quite different.  Probably need to hack
        # dulwich (find_copies_harder is off by default anyway).
        for kind, old, new in \
            renamer.changes_with_renames(store[repo.refs['HEAD']].tree,
                                         index.commit(store)):
            # Each of old, new is a named triple of .path, .mode and
            # .sha; kind is the change type, in ('add', 'modify',
            # 'delete', 'rename', 'copy', 'unchanged'), although we
            # shouldn't get the last.  If new.path is None, file was
            # removed, not renamed; otherwise, if new has a
            # disclaimer, it's private despite its name and path.
            if new.path and not ignore(new.path, complain.write):
                assert kind not in ('unchanged', 'delete'), kind
                if kind != 'add':
                    # Filter out boring changes
                    index[new.path] = Selector(store, new.sha, old.sha,
                                               old.mode or new.mode).refine()
            elif old.path:  # disclaimed or removed: ignore by restoring
                assert new.path or kind == 'delete', (kind, new.path)
                index[old.path] = Selector.restore(store[old.sha], old.mode)
                talk.write(old.path + '\n')
                if new.path and new.path != old.path:
                    talk.write(new.path + '\n')
            else:  # new but disclaimed: ignore by discarding
                assert kind == 'add' and new.path, (kind, new.path)
                del index[new.path]
                talk.write(new.path + '\n')

        index.write()
    except IOError:  # ... and any other errors that just mean failure.
        return 1
    return 0
Exemple #13
0
def main(args, hear, talk, complain):
    # Future: we may want to parse more args, query the user or wrap
    # talk, complain for verbosity control.
    ignore = Scanner.disclaimed if '--disclaim' in args else (
        lambda p, w: False)

    # We're in the root directory of the module:
    repo = Repo('.')
    store, index = repo.object_store, repo.open_index()
    renamer = RenameDetector(store)
    try:
        # TODO: demand stronger similarity for a copy than for rename;
        # our huge copyright headers (and common boilerplate) make
        # small header files look very similar despite their real
        # content all being quite different.  Probably need to hack
        # dulwich (find_copies_harder is off by default anyway).
        for kind, old, new in \
            renamer.changes_with_renames(store[repo.refs['HEAD']].tree,
                                         index.commit(store)):
            # Each of old, new is a named triple of .path, .mode and
            # .sha; kind is the change type, in ('add', 'modify',
            # 'delete', 'rename', 'copy', 'unchanged'), although we
            # shouldn't get the last.  If new.path is None, file was
            # removed, not renamed; otherwise, if new has a
            # disclaimer, it's private despite its name and path.
            if new.path and not ignore(new.path, complain.write):
                assert kind not in ('unchanged', 'delete'), kind
                if kind != 'add':
                    # Filter out boring changes
                    index[new.path] = Selector(store, new.sha, old.sha,
                                               old.mode or new.mode).refine()
            elif old.path:  # disclaimed or removed: ignore by restoring
                assert new.path or kind == 'delete', (kind, new.path)
                index[old.path] = Selector.restore(store[old.sha], old.mode)
            else:  # new but disclaimed: ignore by discarding
                assert kind == 'add' and new.path, (kind, new.path)
                del index[new.path]

        index.write()
    except IOError:  # ... and any other errors that just mean failure.
        return 1
    return 0
Exemple #14
0
def initdGit(path):
    ''' Takes current working directory (within repository) as argument.
        initdGit builds dgit path and files, determines whether repository
        needs dgit initialization,whether directory is already a git
        repository, adds space for user foaf URI in local config file,
        and calls function to document repository in .dgit/provenance.ttl
    '''
    
    os.chdir(path)
    needsInit = False

    try: #if error is not raised, repository needs dgit init
        dgitpath = ".dgit"
        dgitProv = dgitpath + "/provenance.ttl"
        os.mkdir(dgitpath)
        os.system('touch %s'%dgitProv)
        needsInit = True 
    except:
        print '''Repository already initialized.
To reinitialize Git, please use git init command'''

    if needsInit:
    
        repoExists=False
        try: #if error is not raised, git repository alreaded inited
            repo = Repo(path)
            repoExists=True 
        except:
            initNewRepo(path,dgitProv) #calls function to initialize git
        
        if repoExists:
            os.system('git config user.foaf "" ') #create space in local config for foaf URI
            if not list(repo.open_index()): 
                newRepo=True #if no files are tracked in repo, assumes bare repository
            else:
                newRepo=False 
            writeProv(path,newRepo) #calls function to write provenance
            repo.stage([dgitProv]) #stages provenance (via dulwich)
            print '''Repository converted to dgit repository,
Exemple #15
0
	def dulwichCommit(self, filePath, fullPath, kind):

		git = Repo(AUTOGIT_PATH)
		staged = map(str,[filePath])
		git.stage( staged )

		index = git.open_index()

		try:
			committer = git._get_user_identity()
		except ValueError:
			committer = "autogit"

		try:
			head = git.head()
		except KeyError:
			return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer)

		changes = list(tree_changes(git, index.commit(git.object_store), git['HEAD'].tree))
		if changes and len(changes) > 0:
			return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer)
		return None
Exemple #16
0
class Gittle(object):
    """All paths used in Gittle external methods must be paths relative to the git repository
    """
    DEFAULT_COMMIT = 'HEAD'
    DEFAULT_BRANCH = 'master'
    DEFAULT_REMOTE = 'origin'
    DEFAULT_MESSAGE = '**No Message**'
    DEFAULT_USER_INFO = {
        'name': None,
        'email': None,
    }

    DIFF_FUNCTIONS = {
        'classic': utils.git.classic_tree_diff,
        'dict': utils.git.dict_tree_diff,
        'changes': utils.git.dict_tree_diff
    }
    DEFAULT_DIFF_TYPE = 'dict'

    HIDDEN_REGEXES = [
        # Hide git directory
        r'.*\/\.git\/.*',
    ]

    # References
    REFS_BRANCHES = 'refs/heads/'
    REFS_REMOTES = 'refs/remotes/'
    REFS_TAGS = 'refs/tags/'

    # Name pattern truths
    # Used for detecting if files are :
    # - deleted
    # - added
    # - changed
    PATTERN_ADDED = (False, True)
    PATTERN_REMOVED = (True, False)
    PATTERN_MODIFIED = (True, True)

    # Permissions
    MODE_DIRECTORY = 0o40000  # Used to tell if a tree entry is a directory

    # Tree depth
    MAX_TREE_DEPTH = 1000

    # Acceptable Root paths
    ROOT_PATHS = (os.path.curdir, os.path.sep)

    def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs):
        if isinstance(repo_or_path, DulwichRepo):
            self.repo = repo_or_path
        elif isinstance(repo_or_path, Gittle):
            self.repo = DulwichRepo(repo_or_path.path)
        elif isinstance(repo_or_path, basestring):
            path = os.path.abspath(repo_or_path)
            self.repo = DulwichRepo(path)
        else:
            logging.warning('Repo is of type %s' % type(repo_or_path))
            raise Exception('Gittle must be initialized with either a dulwich repository or a string to the path')

        # Set path
        self.path = self.repo.path

        # The remote url
        self.origin_uri = origin_uri

        # Report client activty
        self._report_activity = report_activity

        # Build ignore filter
        self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES)
        self.hidden_regexes.extend(self._get_ignore_regexes())
        self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes)
        self.filters = [
            self.ignore_filter,
        ]

        # Get authenticator
        if auth:
            self.authenticator = auth
        else:
            self.auth(*args, **kwargs)

    def report_activity(self, *args, **kwargs):
        if not self._report_activity:
            return
        return self._report_activity(*args, **kwargs)

    def _format_author(self, name, email):
        return "%s <%s>" % (name, email)

    def _format_userinfo(self, userinfo):
        name = userinfo.get('name')
        email = userinfo.get('email')
        if name and email:
            return self._format_author(name, email)
        return None

    def _format_ref(self, base, extra):
        return ''.join([base, extra])

    def _format_ref_branch(self, branch_name):
        return self._format_ref(self.REFS_BRANCHES, branch_name)

    def _format_ref_remote(self, remote_name):
        return self._format_ref(self.REFS_REMOTES, remote_name)

    def _format_ref_tag(self, tag_name):
        return self._format_ref(self.REFS_TAGS, tag_name)

    @property
    def head(self):
        """Return SHA of the current HEAD
        """
        return self.repo.head()

    @property
    def is_bare(self):
        """Bare repositories have no working directories or indexes
        """
        return self.repo.bare

    @property
    def is_working(self):
        return not(self.is_bare)

    def has_index(self):
        """Opposite of is_bare
        """
        return self.repo.has_index()

    @property
    def has_commits(self):
        """
        If the repository has no HEAD we consider that is has no commits
        """
        try:
            self.repo.head()
        except KeyError:
            return False
        return True

    def ref_walker(self, ref=None):
        """
        Very simple, basic walker
        """
        ref = ref or 'HEAD'
        sha = self._commit_sha(ref)
        for entry in self.repo.get_walker(sha):
            yield entry.commit

    def branch_walker(self, branch):
        branch = branch or self.active_branch
        ref = self._format_ref_branch(branch)
        return self.ref_walker(ref)

    def commit_info(self, start=0, end=None, branch=None):
        """Return a generator of commits with all their attached information
        """
        if not self.has_commits:
            return []
        commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)]
        if not end:
            return commits
        return commits[start:end]


    @funky.uniquify
    def recent_contributors(self, n=None, branch=None):
        n = n or 10
        return funky.pluck(self.commit_info(end=n, branch=branch), 'author')

    @property
    def commit_count(self):
        try:
            return len(self.ref_walker())
        except KeyError:
            return 0

    def commits(self):
        """Return a list of SHAs for all the concerned commits
        """
        return [commit['sha'] for commit in self.commit_info()]

    @property
    def git_dir(self):
        return self.repo.controldir()

    def auth(self, *args, **kwargs):
        self.authenticator = GittleAuth(*args, **kwargs)
        return self.authenticator

    # Generate a branch selector (used for pushing)
    def _wants_branch(self, branch_name=None):
        branch_name = branch_name or self.active_branch
        refs_key = self._format_ref_branch(branch_name)
        sha = self.branches[branch_name]

        def wants_func(old):
            refs_key = self._format_ref_branch(branch_name)
            return {
                refs_key: sha
            }
        return wants_func

    def _get_ignore_regexes(self):
        gitignore_filename = os.path.join(self.path, '.gitignore')
        if not os.path.exists(gitignore_filename):
            return []
        lines = open(gitignore_filename).readlines()
        globers = map(lambda line: line.rstrip(), lines)
        return utils.paths.globers_to_regex(globers)

    # Get the absolute path for a file in the git repo
    def abspath(self, repo_file):
        return os.path.abspath(
            os.path.join(self.path, repo_file)
        )

    # Get the relative path from the absolute path
    def relpath(self, abspath):
        return os.path.relpath(abspath, self.path)

    @property
    def last_commit(self):
        return self[self.repo.head()]

    @property
    def index(self):
        return self.repo.open_index()

    @classmethod
    def init(cls, path, bare=None, *args, **kwargs):
        """Initialize a repository"""
        mkdir_safe(path)

        # Constructor to use
        if bare:
            constructor = DulwichRepo.init_bare
        else:
            constructor = DulwichRepo.init

        # Create dulwich repo
        repo = constructor(path)

        # Create Gittle repo
        return cls(repo, *args, **kwargs)

    @classmethod
    def init_bare(cls, *args, **kwargs):
        kwargs.setdefault('bare', True)
        return cls.init(*args, **kwargs)

    @classmethod
    def is_repo(cls, path):
        """Returns True if path is a git repository, False if it is not"""
        try:
            repo = Gittle(path)
        except NotGitRepository:
            return False
        else:
            return True

    def get_client(self, origin_uri=None, **kwargs):
        # Get the remote URL
        origin_uri = origin_uri or self.origin_uri

        # Fail if inexistant
        if not origin_uri:
            raise InvalidRemoteUrl()

        client_kwargs = {}
        auth_kwargs = self.authenticator.kwargs()

        client_kwargs.update(auth_kwargs)
        client_kwargs.update(kwargs)
        client_kwargs.update({
            'report_activity': self.report_activity
        })

        client, remote_path = get_transport_and_path(origin_uri, **client_kwargs)
        return client, remote_path

    def push_to(self, origin_uri, branch_name=None, progress=None):
        selector = self._wants_branch(branch_name=branch_name)
        client, remote_path = self.get_client(origin_uri)
        return client.send_pack(
            remote_path,
            selector,
            self.repo.object_store.generate_pack_contents,
            progress=progress
        )

    # Like: git push
    def push(self, origin_uri=None, branch_name=None, progress=None):
        return self.push_to(origin_uri, branch_name, progress)

    # Not recommended at ALL ... !!!
    def dirty_pull_from(self, origin_uri, branch_name=None):
        # Remove all previously existing data
        rmtree(self.path)
        mkdir_safe(self.path)
        self.repo = DulwichRepo.init(self.path)

        # Fetch brand new copy from remote
        return self.pull_from(origin_uri, branch_name)

    def pull_from(self, origin_uri, branch_name=None):
        return self.fetch(origin_uri)

    # Like: git pull
    def pull(self, origin_uri=None, branch_name=None):
        return self.pull_from(origin_uri, branch_name)

    def fetch_remote(self, origin_uri=None):
        # Get client
        client, remote_path = self.get_client(origin_uri=origin_uri)

        # Fetch data from remote repository
        remote_refs = client.fetch(remote_path, self.repo)

        return remote_refs


    def _setup_fetched_refs(self, refs, origin, bare):
        remote_tags = utils.git.subrefs(refs, 'refs/tags')
        remote_heads = utils.git.subrefs(refs, 'refs/heads')

        # Filter refs
        clean_remote_tags = utils.git.clean_refs(remote_tags)
        clean_remote_heads = utils.git.clean_refs(remote_heads)

        # Base of new refs
        heads_base = 'refs/remotes/' + origin
        if bare:
            heads_base = 'refs/heads'

        # Import branches
        self.import_refs(
            heads_base,
            clean_remote_heads
        )

        # Import tags
        self.import_refs(
            'refs/tags',
            clean_remote_tags
        )

        # Update HEAD
        for k, v in utils.git.clean_refs(refs).items():
            self[k] = v


    def fetch(self, origin_uri=None, bare=None, origin=None):
        bare = bare or False
        origin = origin or self.DEFAULT_REMOTE

        # Remote refs
        remote_refs = self.fetch_remote(origin_uri)

        # Update head
        # Hit repo because head doesn't yet exist so
        # print("REFS = %s" % remote_refs)

        # If no refs (empty repository()
        if not remote_refs:
            return

        # Update refs (branches, tags, HEAD)
        self._setup_fetched_refs(remote_refs, origin, bare)

        # Checkout working directories
        if not bare and self.has_commits:
            self.checkout_all()
        else:
            self.update_server_info()


    @classmethod
    def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs):
        """Clone a remote repository"""
        mkdir_safe(local_path)

        # Initialize the local repository
        if bare:
            local_repo = cls.init_bare(local_path)
        else:
            local_repo = cls.init(local_path)

        repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs)

        repo.fetch(bare=bare)

        # Add origin
        repo.add_remote('origin', origin_uri)

        return repo

    @classmethod
    def clone_bare(cls, *args, **kwargs):
        """Same as .clone except clones to a bare repository by default
        """
        kwargs.setdefault('bare', True)
        return cls.clone(*args, **kwargs)

    def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs):

        if not tree:
            # If no tree then stage files
            modified_files = files or self.modified_files
            logging.info("STAGING : %s" % modified_files)
            self.repo.stage(modified_files)

        # Messages
        message = message or self.DEFAULT_MESSAGE
        author_msg = self._format_userinfo(author)
        committer_msg = self._format_userinfo(committer)

        return self.repo.do_commit(
            message=message,
            author=author_msg,
            committer=committer_msg,
            encoding='UTF-8',
            tree=tree,
            *args, **kwargs
        )

    def _tree_from_structure(self, structure):
        # TODO : Support directories
        tree = Tree()

        for file_info in structure:

            # str only
            try:
                data = file_info['data'].encode('ascii')
                name = file_info['name'].encode('ascii')
                mode = file_info['mode']
            except:
                # Skip file on encoding errors
                continue

            blob = Blob()

            blob.data = data

            # Store file's contents
            self.repo.object_store.add_object(blob)

            # Add blob entry
            tree.add(
                name,
                mode,
                blob.id
            )

        # Store tree
        self.repo.object_store.add_object(tree)

        return tree.id

    # Like: git commmit -a
    def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs):
        user_info = {
            'name': name,
            'email': email,
        }
        return self._commit(
            committer=user_info,
            author=user_info,
            message=message,
            files=files,
            *args,
            **kwargs
        )

    def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs):
        """Main use is to do commits directly to bare repositories
        For example doing a first Initial Commit so the repo can be cloned and worked on right away
        """
        if not structure:
            return
        tree = self._tree_from_structure(structure)

        user_info = {
            'name': name,
            'email': email,
        }

        return self._commit(
            committer=user_info,
            author=user_info,
            message=message,
            tree=tree,
            *args,
            **kwargs
        )

    # Push all local commits
    # and pull all remote commits
    def sync(self, origin_uri=None):
        self.push(origin_uri)
        return self.pull(origin_uri)

    def lookup_entry(self, relpath, trackable_files=set()):
        if not relpath in trackable_files:
            raise KeyError

        abspath = self.abspath(relpath)

        with open(abspath, 'rb') as git_file:
            data = git_file.read()
            s = sha1()
            s.update("blob %u\0" % len(data))
            s.update(data)
        return (s.hexdigest(), os.stat(abspath).st_mode)

    @property
    @funky.transform(set)
    def tracked_files(self):
        return list(self.index)

    @property
    @funky.transform(set)
    def raw_files(self):
        return utils.paths.subpaths(self.path)

    @property
    @funky.transform(set)
    def ignored_files(self):
        return utils.paths.subpaths(self.path, filters=self.filters)

    @property
    @funky.transform(set)
    def trackable_files(self):
        return self.raw_files - self.ignored_files

    @property
    @funky.transform(set)
    def untracked_files(self):
        return self.trackable_files - self.tracked_files

    """
    @property
    @funky.transform(set)
    def modified_staged_files(self):
        "Checks if the file has changed since last commit"
        timestamp = self.last_commit.commit_time
        index = self.index
        return [
            f
            for f in self.tracked_files
            if index[f][1][0] > timestamp
        ]
    """

    # Return a list of tuples
    # representing the changed elements in the git tree
    def _changed_entries(self, ref=None):
        ref = ref or self.DEFAULT_COMMIT
        if not self.has_commits:
            return []
        obj_sto = self.repo.object_store
        tree_id = self[ref].tree
        names = self.trackable_files

        lookup_func = partial(self.lookup_entry, trackable_files=names)

        # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...]
        tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False)
        return list(tree_diff)

    @funky.transform(set)
    def _changed_entries_by_pattern(self, pattern):
        changed_entries = self._changed_entries()
        filtered_paths = None
         #if the pattern is PATTERN_MODIFIED, should check the sha
        if self.PATTERN_MODIFIED == pattern:
            filtered_paths = [
              funky.first_true(names)
                  for names, modes, sha in changed_entries
                  if tuple(map(bool, names)) == pattern and funky.first_true(names) and sha[0] == sha[1]
            ]
        else :
            filtered_paths = [
               funky.first_true(names)
                 for names, modes, sha in changed_entries
                 if tuple(map(bool, names)) == pattern and funky.first_true(names)
            ]
        return filtered_paths

    @property
    @funky.transform(set)
    def removed_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files

    @property
    @funky.transform(set)
    def added_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files

    @property
    @funky.transform(set)
    def modified_files(self):
        modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files
        return modified_files

    @property
    @funky.transform(set)
    def modified_unstaged_files(self):
        timestamp = self.last_commit.commit_time
        return [
            f
            for f in self.tracked_files
            if os.stat(self.abspath(f)).st_mtime > timestamp
        ]

    @property
    def pending_files(self):
        """
        Returns a list of all files that could be possibly staged
        """
        # Union of both
        return self.modified_files | self.added_files | self.removed_files

    @property
    def pending_files_by_state(self):
        files = {
            'modified': self.modified_files,
            'added': self.added_files,
            'removed': self.removed_files
        }

        # "Flip" the dictionary
        return {
            path: state
            for state, paths in files.items()
            for path in paths
        }

    """
    @property
    @funky.transform(set)
    def modified_files(self):
        return self.modified_staged_files | self.modified_unstaged_files
    """

    # Like: git add
    @funky.arglist_method
    def stage(self, files):
        return self.repo.stage(files)

    def add(self, *args, **kwargs):
        return self.stage(*args, **kwargs)

    # Like: git rm
    @funky.arglist_method
    def rm(self, files, force=False):
        index = self.index
        index_files = filter(lambda f: f in index, files)
        for f in index_files:
            del self.index[f]
        return index.write()

    def mv_fs(self, file_pair):
        old_name, new_name = file_pair
        os.rename(old_name, new_name)

    # Like: git mv
    @funky.arglist_method
    def mv(self, files_pair):
        index = self.index
        files_in_index = filter(lambda f: f[0] in index, files_pair)
        map(self.mv_fs, files_in_index)
        old_files = map(funky.first, files_in_index)
        new_files = map(funky.last, files_in_index)
        self.add(new_files)
        self.rm(old_files)
        self.add(old_files)
        return

    @working_only
    def _checkout_tree(self, tree):
        return build_index_from_tree(
            self.repo.path,
            self.repo.index_path(),
            self.repo.object_store,
            tree
        )

    def checkout_all(self, commit_sha=None):
        commit_sha = commit_sha or self.head
        commit_tree = self._commit_tree(commit_sha)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    def checkout(self, ref):
        """Checkout a given ref or SHA
        """
        self.repo.refs.set_symbolic_ref('HEAD', ref)
        commit_tree = self._commit_tree(ref)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    @funky.arglist_method
    def reset(self, files, commit='HEAD'):
        pass

    def rm_all(self):
        # if we go at the index via the property, it is reconstructed
        # each time and therefore clear() doesn't have the desired effect,
        # therefore, we cache it in a variable and use that.
        i = self.index
        i.clear()
        return i.write()

    def _to_commit(self, commit_obj):
        """Allows methods to accept both SHA's or dulwich Commit objects as arguments
        """
        if isinstance(commit_obj, basestring):
            return self.repo[commit_obj]
        return commit_obj

    def _commit_sha(self, commit_obj):
        """Extracts a Dulwich commits SHA
        """
        if utils.git.is_sha(commit_obj):
            return commit_obj
        elif isinstance(commit_obj, basestring):
            # Can't use self[commit_obj] to avoid infinite recursion
            commit_obj = self.repo[self.dwim_reference(commit_obj)]
        return commit_obj.id

    def dwim_reference(self, ref):
        """Dwim resolves a short reference to a full reference
        """

        # Formats of refs we want to try in order
        formats = [
            "%s",
            "refs/%s",
            "refs/tags/%s",
            "refs/heads/%s",
            "refs/remotes/%s",
            "refs/remotes/%s/HEAD",
        ]

        for f in formats:
            try:
                fullref = f % ref
                if not fullref in self.repo:
                    continue
                return fullref
            except:
                continue

        raise Exception("Could not resolve ref")

    def blob_data(self, sha):
        """Return a blobs content for a given SHA
        """
        return self[sha].data

    # Get the nth parent back for a given commit
    def get_parent_commit(self, commit, n=None):
        """ Recursively gets the nth parent for a given commit
            Warning: Remember that parents aren't the previous commits
        """
        if n is None:
            n = 1
        commit = self._to_commit(commit)
        parents = commit.parents

        if n <= 0 or not parents:
            # Return a SHA
            return self._commit_sha(commit)

        parent_sha = parents[0]
        parent = self[parent_sha]

        # Recur
        return self.get_parent_commit(parent, n - 1)

    def get_previous_commit(self, commit_ref, n=None):
        commit_sha = self._parse_reference(commit_ref)
        n = n or 1
        commits = self.commits()
        return funky.next(commits, commit_sha, n=n, default=commit_sha)

    def _parse_reference(self, ref_string):
        # COMMIT_REF~x
        if '~' in ref_string:
            ref, count = ref_string.split('~')
            count = int(count)
            commit_sha = self._commit_sha(ref)
            return self.get_previous_commit(commit_sha, count)
        return self._commit_sha(ref_string)

    def _commit_tree(self, commit_sha):
        """Return the tree object for a given commit
        """
        return self[commit_sha].tree

    def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True):
        diff_type = diff_type or self.DEFAULT_DIFF_TYPE
        diff_func = self.DIFF_FUNCTIONS[diff_type]

        if not compare_to:
            compare_to = self.get_previous_commit(commit_sha)

        return self._diff_between(compare_to, commit_sha, diff_function=diff_func)

    def diff_working(self, ref=None, filter_binary=True):
        """Diff between the current working directory and the HEAD
        """
        return utils.git.diff_changes_paths(
            self.repo.object_store,
            self.path,
            self._changed_entries(ref=ref),
            filter_binary=filter_binary
        )

    def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None):
        """Returns a dict of the following Format :
            {
                "directory/filename.txt": {
                    'name': 'filename.txt',
                    'path': "directory/filename.txt",
                    "sha": "xxxxxxxxxxxxxxxxxxxx",
                    "data": "blablabla",
                    "mode": 0xxxxx",
                },
                ...
            }
        """
        # Default values
        context = {}
        is_tree = is_tree or False
        parent_path = parent_path or ''

        if is_tree:
            tree = self[commit_sha]
        else:
            tree = self[self._commit_tree(commit_sha)]

        for entry in tree.items():
            # Check if entry is a directory
            if entry.mode == self.MODE_DIRECTORY:
                context.update(
                    self.get_commit_files(entry.sha, parent_path=os.path.join(parent_path, entry.path), is_tree=True, paths=paths)
                )
                continue

            subpath = os.path.join(parent_path, entry.path)

            # Only add the files we want
            if not(paths is None or subpath in paths):
                continue

            # Add file entry
            context[subpath] = {
                'name': entry.path,
                'path': subpath,
                'mode': entry.mode,
                'sha': entry.sha,
                'data': self.blob_data(entry.sha),
            }
        return context

    def file_versions(self, path):
        """Returns all commits where given file was modified
        """
        versions = []
        commits_info = self.commit_info()
        seen_shas = set()

        for commit in commits_info:
            try:
                files = self.get_commit_files(commit['sha'], paths=[path])
                file_path, file_data = files.items()[0]
            except IndexError:
                continue

            file_sha = file_data['sha']

            if file_sha in seen_shas:
                continue
            else:
                seen_shas.add(file_sha)

            # Add file info
            commit['file'] = file_data
            versions.append(file_data)
        return versions

    def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True):
        """Internal method for getting a diff between two commits
            Please use .diff method unless you have very specific needs
        """

        # If commit is first commit (new_commit_sha == old_commit_sha)
        # then compare to an empty tree
        if new_commit_sha == old_commit_sha:
            old_tree = Tree()
        else:
            old_tree = self._commit_tree(old_commit_sha)

        new_tree = self._commit_tree(new_commit_sha)

        return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary)

    def changes(self, *args, **kwargs):
        """ List of changes between two SHAs
            Returns a list of lists of tuples :
            [
                [
                    (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
                ],
                ...
            ]
        """
        kwargs['diff_type'] = 'changes'
        return self.diff(*args, **kwargs)

    def changes_count(self, *args, **kwargs):
        return len(self.changes(*args, **kwargs))

    def _refs_by_pattern(self, pattern):
        refs = self.refs

        def item_filter(key_value):
            """Filter only concered refs"""
            key, value = key_value
            return key.startswith(pattern)

        def item_map(key_value):
            """Rewrite keys"""
            key, value = key_value
            new_key = key[len(pattern):]
            return (new_key, value)

        return dict(
            map(item_map,
                filter(
                    item_filter,
                    refs.items()
                )
            )
        )

    @property
    def refs(self):
        return self.repo.get_refs()

    def set_refs(refs_dict):
        for k, v in refs_dict.items():
            self.repo[k] = v

    def import_refs(self, base, other):
        return self.repo.refs.import_refs(base, other)

    @property
    def branches(self):
        return self._refs_by_pattern(self.REFS_BRANCHES)

    @property
    def active_branch(self):
        """Returns the name of the active branch, or None, if HEAD is detached
        """
        x = self.repo.refs.read_ref('HEAD')
        if not x.startswith(SYMREF):
            return None
        else:
            symref = x[len(SYMREF):]
            if not symref.startswith(self.REFS_BRANCHES):
                return None
            else:
                return symref[len(self.REFS_BRANCHES):]

    @property
    def active_sha(self):
        """Deprecated equivalent to head property
        """
        return self.head

    @property
    def remote_branches(self):
        return self._refs_by_pattern(self.REFS_REMOTES)

    @property
    def tags(self):
        return self._refs_by_pattern(self.REFS_TAGS)

    @property
    def remotes(self):
        """ Dict of remotes
        {
            'origin': 'http://friendco.de/some_user/repo.git',
            ...
        }
        """
        config = self.repo.get_config()
        return {
            keys[1]: values['url']
            for keys, values in config.items()
            if keys[0] == 'remote'
        }

    def add_remote(self, remote_name, remote_url):
        # Get repo's config
        config = self.repo.get_config()

        # Add new entries for remote
        config.set(('remote', remote_name), 'url', remote_url)
        config.set(('remote', remote_name), 'fetch', "+refs/heads/*:refs/remotes/%s/*" % remote_name)

        # Write to disk
        config.write_to_path()

        return remote_name

    def add_ref(self, new_ref, old_ref):
        self.repo.refs[new_ref] = old_ref
        self.update_server_info()

    def remove_ref(self, ref_name):
        # Returns False if ref doesn't exist
        if not ref_name in self.repo.refs:
            return False
        del self.repo.refs[ref_name]
        self.update_server_info()
        return True

    def create_branch(self, base_branch, new_branch, tracking=None):
        """Try creating a new branch which tracks the given remote
            if such a branch does not exist then branch off a local branch
        """

        # The remote to track
        tracking = self.DEFAULT_REMOTE

        # Already exists
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        # Get information about remote_branch
        remote_branch = os.path.sep.join([tracking, base_branch])

        # Fork Local
        if base_branch in self.branches:
            base_ref = self._format_ref_branch(base_branch)
        # Fork remote
        elif remote_branch in self.remote_branches:
            base_ref = self._format_ref_remote(remote_branch)
            # TODO : track
        else:
            raise Exception("Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking))

        # Reference of new branch
        new_ref = self._format_ref_branch(new_branch)

        # Copy reference to create branch
        self.add_ref(new_ref, base_ref)

        return new_ref

    def create_orphan_branch(self, new_branch, empty_index=None):
        """ Create a new branch with no commits in it.
        Technically, just points HEAD to a non-existent branch.  The actual branch will
        only be created if something is committed.  This is equivalent to:

            git checkout --orphan <new_branch>,

        Unless empty_index is set to True, in which case the index will be emptied along
        with the file-tree (which is always emptied).  Against a clean working tree,
        this is equivalent to:

            git checkout --orphan <new_branch>
            git reset --merge
        """
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        new_ref = self._format_ref_branch(new_branch)
        self.repo.refs.set_symbolic_ref('HEAD', new_ref)

        if self.is_working:
            if empty_index:
               self.rm_all()
            self.clean_working()

        return new_ref

    def remove_branch(self, branch_name):
        ref = self._format_ref_branch(branch_name)
        return self.remove_ref(ref)

    def switch_branch(self, branch_name, tracking=None, create=None):
        """Changes the current branch
        """
        if create is None:
            create = True

        # Check if branch exists
        if not branch_name in self.branches:
            self.create_branch(branch_name, branch_name, tracking=tracking)

        # Get branch reference
        branch_ref = self._format_ref_branch(branch_name)

        # Change main branch
        self.repo.refs.set_symbolic_ref('HEAD', branch_ref)

        if self.is_working:
            # Remove all files
            self.clean_working()

            # Add files for the current branch
            self.checkout_all()

    def create_tag(self, tag_name, target):
        ref = self._format_ref_tag(tag_name)
        return self.add_ref(ref, self._parse_reference(target))

    def remove_tag(self, tag_name):
        ref = self._format_ref_tag(tag_name)
        return self.remove_ref(ref)

    def clean(self, force=None, directories=None):
        untracked_files = self.untracked_files
        map(os.remove, untracked_files)
        return untracked_files

    def clean_working(self):
        """Purges all the working (removes everything except .git)
            used by checkout_all to get clean branch switching
        """
        return self.clean()

    def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None):
        tree = self[tree_sha]
        structure = {}
        if depth is None:
            depth = self.MAX_TREE_DEPTH
        elif depth == 0:
            return structure
        for entry in tree.items():
            # tree
            if entry.mode == self.MODE_DIRECTORY:
                # Recur
                structure[entry.path] = self._get_fs_structure(entry.sha, depth=depth - 1, parent_sha=tree_sha)
            # commit
            else:
                structure[entry.path] = entry.sha
        structure['.'] = tree_sha
        structure['..'] = parent_sha or tree_sha
        return structure

    def _get_fs_structure_by_path(self, tree_sha, path):
        parts = path.split(os.path.sep)
        depth = len(parts) + 1
        structure = self._get_fs_structure(tree_sha, depth=depth)

        return funky.subkey(structure, parts)

    def commit_ls(self, ref, subpath=None):
        """List a "directory" for a given commit
           using the tree of that commit
        """
        tree_sha = self._commit_tree(ref)

        # Root path
        if subpath in self.ROOT_PATHS or not subpath:
            return self._get_fs_structure(tree_sha, depth=1)
        # Any other path
        return self._get_fs_structure_by_path(tree_sha, subpath)

    def commit_file(self, ref, path):
        """Return info on a given file for a given commit
        """
        name, info = self.get_commit_files(ref, paths=[path]).items()[0]
        return info

    def commit_tree(self, ref, *args, **kwargs):
        tree_sha = self._commit_tree(ref)
        return self._get_fs_structure(tree_sha, *args, **kwargs)

    def update_server_info(self):
        if not self.is_bare:
            return
        update_server_info(self.repo)

    def _is_fast_forward(self):
        pass

    def _merge_fast_forward(self):
        pass

    def __hash__(self):
        """This is required otherwise the memoize function will just mess it up
        """
        return hash(self.path)

    def __getitem__(self, key):
        try:
            sha = self._parse_reference(key)
        except:
            raise KeyError(key)
        return self.repo[sha]

    def __setitem__(self, key, value):
        try:
            key = self.dwim_reference(key)
        except:
            pass
        self.repo[key] = value

    def __contains__(self, key):
        try:
            key = self.dwim_reference(key)
        except:
            pass
        return key in self.repo

    def __delitem__(self, key):
        try:
            key = self.dwim_reference(key)
        except:
            raise KeyError(key)
        self.remove_ref(key)


    # Alias to clone_bare
    fork = clone_bare
    log = commit_info
    diff_count = changes_count
    contributors = recent_contributors
Exemple #17
0
class DulwichBackend(BaseGitBackend):  # pylint:disable=abstract-method
    """Dulwich Git backend."""

    # Dulwich progress will return messages equivalent to git CLI,
    # our pbars should just display the messages as formatted by dulwich
    BAR_FMT_NOTOTAL = "{desc}{bar:b}|{postfix[info]} [{elapsed}]"

    def __init__(  # pylint:disable=W0231
            self,
            root_dir=os.curdir,
            search_parent_directories=True):
        from dulwich.errors import NotGitRepository
        from dulwich.repo import Repo

        try:
            if search_parent_directories:
                self.repo = Repo.discover(start=root_dir)
            else:
                self.repo = Repo(root_dir)
        except NotGitRepository as exc:
            raise SCMError(f"{root_dir} is not a git repository") from exc

        self._submodules: Dict[str, "PathInfo"] = self._find_submodules()
        self._stashes: dict = {}

    def _find_submodules(self) -> Dict[str, "PathInfo"]:
        """Return dict mapping submodule names to submodule paths.

        Submodule paths will be relative to Git repo root.
        """
        from dulwich.config import ConfigFile, parse_submodules

        submodules: Dict[str, "PathInfo"] = {}
        config_path = os.path.join(self.root_dir, ".gitmodules")
        if os.path.isfile(config_path):
            config = ConfigFile.from_path(config_path)
            for path, _url, section in parse_submodules(config):
                submodules[os.fsdecode(section)] = PathInfo(os.fsdecode(path))
        return submodules

    def close(self):
        self.repo.close()

    @property
    def root_dir(self) -> str:
        return self.repo.path

    @staticmethod
    def clone(
        url: str,
        to_path: str,
        rev: Optional[str] = None,
        shallow_branch: Optional[str] = None,
    ):
        raise NotImplementedError

    @property
    def dir(self) -> str:
        return self.repo.commondir()

    def add(self, paths: Union[str, Iterable[str]], update=False):
        from dvc.utils.fs import walk_files

        assert paths or update

        if isinstance(paths, str):
            paths = [paths]

        if update and not paths:
            self.repo.stage(list(self.repo.open_index()))
            return

        files: List[bytes] = []
        for path in paths:
            if not os.path.isabs(path) and self._submodules:
                # NOTE: If path is inside a submodule, Dulwich expects the
                # staged paths to be relative to the submodule root (not the
                # parent git repo root). We append path to root_dir here so
                # that the result of relpath(path, root_dir) is actually the
                # path relative to the submodule root.
                path_info = PathInfo(path).relative_to(self.root_dir)
                for sm_path in self._submodules.values():
                    if path_info.isin(sm_path):
                        path = os.path.join(self.root_dir,
                                            path_info.relative_to(sm_path))
                        break
            if os.path.isdir(path):
                files.extend(
                    os.fsencode(relpath(fpath, self.root_dir))
                    for fpath in walk_files(path))
            else:
                files.append(os.fsencode(relpath(path, self.root_dir)))

        # NOTE: this doesn't check gitignore, same as GitPythonBackend.add
        if update:
            index = self.repo.open_index()
            if os.name == "nt":
                # NOTE: we need git/unix separator to compare against index
                # paths but repo.stage() expects to be called with OS paths
                self.repo.stage([
                    fname for fname in files
                    if fname.replace(b"\\", b"/") in index
                ])
            else:
                self.repo.stage([fname for fname in files if fname in index])
        else:
            self.repo.stage(files)

    def commit(self, msg: str, no_verify: bool = False):
        from dulwich.errors import CommitError
        from dulwich.porcelain import commit
        from dulwich.repo import InvalidUserIdentity

        try:
            commit(self.root_dir, message=msg, no_verify=no_verify)
        except CommitError as exc:
            raise SCMError("Git commit failed") from exc
        except InvalidUserIdentity as exc:
            raise SCMError(
                "Git username and email must be configured") from exc

    def checkout(
        self,
        branch: str,
        create_new: Optional[bool] = False,
        force: bool = False,
        **kwargs,
    ):
        raise NotImplementedError

    def pull(self, **kwargs):
        raise NotImplementedError

    def push(self):
        raise NotImplementedError

    def branch(self, branch: str):
        from dulwich.porcelain import Error, branch_create

        try:
            branch_create(self.root_dir, branch)
        except Error as exc:
            raise SCMError(f"Failed to create branch '{branch}'") from exc

    def tag(self, tag: str):
        raise NotImplementedError

    def untracked_files(self) -> Iterable[str]:
        _staged, _unstaged, untracked = self.status()
        return untracked

    def is_tracked(self, path: str) -> bool:
        rel = PathInfo(path).relative_to(self.root_dir).as_posix().encode()
        rel_dir = rel + b"/"
        for path in self.repo.open_index():
            if path == rel or path.startswith(rel_dir):
                return True
        return False

    def is_dirty(self, untracked_files: bool = False) -> bool:
        staged, unstaged, untracked = self.status()
        return bool(staged or unstaged or (untracked_files and untracked))

    def active_branch(self) -> str:
        raise NotImplementedError

    def list_branches(self) -> Iterable[str]:
        raise NotImplementedError

    def list_tags(self) -> Iterable[str]:
        raise NotImplementedError

    def list_all_commits(self) -> Iterable[str]:
        raise NotImplementedError

    def get_tree_obj(self, rev: str, **kwargs) -> DulwichObject:
        from dulwich.objectspec import parse_tree

        tree = parse_tree(self.repo, rev)
        return DulwichObject(self.repo, ".", stat.S_IFDIR, tree.id)

    def get_rev(self) -> str:
        rev = self.get_ref("HEAD")
        if rev:
            return rev
        raise SCMError("Empty git repo")

    def resolve_rev(self, rev: str) -> str:
        raise NotImplementedError

    def resolve_commit(self, rev: str) -> "GitCommit":
        raise NotImplementedError

    def _get_stash(self, ref: str):
        from dulwich.stash import Stash as DulwichStash

        if ref not in self._stashes:
            self._stashes[ref] = DulwichStash(self.repo, ref=os.fsencode(ref))
        return self._stashes[ref]

    @cached_property
    def ignore_manager(self):
        from dulwich.ignore import IgnoreFilterManager

        return IgnoreFilterManager.from_repo(self.repo)

    def is_ignored(self, path: str) -> bool:
        # `is_ignored` returns `false` if excluded in `.gitignore` and
        # `None` if it's not mentioned at all. `True` if it is ignored.
        return bool(
            self.ignore_manager.is_ignored(relpath(path, self.root_dir)))

    def set_ref(
        self,
        name: str,
        new_ref: str,
        old_ref: Optional[str] = None,
        message: Optional[str] = None,
        symbolic: Optional[bool] = False,
    ):
        name_b = os.fsencode(name)
        new_ref_b = os.fsencode(new_ref)
        old_ref_b = os.fsencode(old_ref) if old_ref else None
        message_b = message.encode("utf-8") if message else None
        if symbolic:
            return self.repo.refs.set_symbolic_ref(name_b,
                                                   new_ref_b,
                                                   message=message_b)
        if not self.repo.refs.set_if_equals(
                name_b, old_ref_b, new_ref_b, message=message_b):
            raise SCMError(f"Failed to set '{name}'")

    def get_ref(self, name, follow: bool = True) -> Optional[str]:
        from dulwich.refs import parse_symref_value

        name_b = os.fsencode(name)
        if follow:
            try:
                ref = self.repo.refs[name_b]
            except KeyError:
                ref = None
        else:
            ref = self.repo.refs.read_ref(name_b)
            try:
                if ref:
                    ref = parse_symref_value(ref)
            except ValueError:
                pass
        if ref:
            return os.fsdecode(ref)
        return None

    def remove_ref(self, name: str, old_ref: Optional[str] = None):
        name_b = name.encode("utf-8")
        old_ref_b = old_ref.encode("utf-8") if old_ref else None
        if not self.repo.refs.remove_if_equals(name_b, old_ref_b):
            raise SCMError(f"Failed to remove '{name}'")

    def iter_refs(self, base: Optional[str] = None):
        base_b = os.fsencode(base) if base else None
        for key in self.repo.refs.keys(base=base_b):
            if base:
                if base.endswith("/"):
                    base = base[:-1]
                yield "/".join([base, os.fsdecode(key)])
            else:
                yield os.fsdecode(key)

    def iter_remote_refs(self, url: str, base: Optional[str] = None):
        from dulwich.client import get_transport_and_path
        from dulwich.porcelain import get_remote_repo

        try:
            _remote, location = get_remote_repo(self.repo, url)
            client, path = get_transport_and_path(location)
        except Exception as exc:
            raise SCMError(
                f"'{url}' is not a valid Git remote or URL") from exc

        if base:
            yield from (os.fsdecode(ref) for ref in client.get_refs(path)
                        if ref.startswith(os.fsencode(base)))
        else:
            yield from (os.fsdecode(ref) for ref in client.get_refs(path))

    def get_refs_containing(self, rev: str, pattern: Optional[str] = None):
        raise NotImplementedError

    def push_refspec(
        self,
        url: str,
        src: Optional[str],
        dest: str,
        force: bool = False,
        on_diverged: Optional[Callable[[str, str], bool]] = None,
    ):
        from dulwich.client import get_transport_and_path
        from dulwich.errors import NotGitRepository, SendPackError
        from dulwich.porcelain import (
            DivergedBranches,
            check_diverged,
            get_remote_repo,
        )

        dest_refs, values = self._push_dest_refs(src, dest)

        try:
            _remote, location = get_remote_repo(self.repo, url)
            client, path = get_transport_and_path(location)
        except Exception as exc:
            raise SCMError(
                f"'{url}' is not a valid Git remote or URL") from exc

        def update_refs(refs):
            new_refs = {}
            for ref, value in zip(dest_refs, values):
                if ref in refs:
                    local_sha = self.repo.refs[ref]
                    remote_sha = refs[ref]
                    try:
                        check_diverged(self.repo, remote_sha, local_sha)
                    except DivergedBranches:
                        if not force:
                            overwrite = False
                            if on_diverged:
                                overwrite = on_diverged(
                                    os.fsdecode(ref),
                                    os.fsdecode(remote_sha),
                                )
                            if not overwrite:
                                continue
                new_refs[ref] = value
            return new_refs

        try:
            with Tqdm(desc="Pushing git refs",
                      bar_format=self.BAR_FMT_NOTOTAL) as pbar:

                def progress(msg_b):
                    msg = msg_b.decode("ascii").strip()
                    pbar.update_msg(msg)
                    pbar.refresh()
                    logger.trace(msg)

                client.send_pack(
                    path,
                    update_refs,
                    self.repo.object_store.generate_pack_data,
                    progress=progress,
                )
        except (NotGitRepository, SendPackError) as exc:
            raise SCMError("Git failed to push '{src}' to '{url}'") from exc

    def _push_dest_refs(self, src: Optional[str],
                        dest: str) -> Tuple[Iterable[bytes], Iterable[bytes]]:
        from dulwich.objects import ZERO_SHA

        if src is not None and src.endswith("/"):
            src_b = os.fsencode(src)
            keys = self.repo.refs.subkeys(src_b)
            values = [self.repo.refs[b"".join([src_b, key])] for key in keys]
            dest_refs = [b"".join([os.fsencode(dest), key]) for key in keys]
        else:
            if src is None:
                values = [ZERO_SHA]
            else:
                values = [self.repo.refs[os.fsencode(src)]]
            dest_refs = [os.fsencode(dest)]
        return dest_refs, values

    def fetch_refspecs(
        self,
        url: str,
        refspecs: Iterable[str],
        force: Optional[bool] = False,
        on_diverged: Optional[Callable[[str, str], bool]] = None,
    ):
        from dulwich.client import get_transport_and_path
        from dulwich.objectspec import parse_reftuples
        from dulwich.porcelain import (
            DivergedBranches,
            check_diverged,
            get_remote_repo,
        )

        fetch_refs = []

        def determine_wants(remote_refs):
            fetch_refs.extend(
                parse_reftuples(
                    remote_refs,
                    self.repo.refs,
                    [os.fsencode(refspec) for refspec in refspecs],
                    force=force,
                ))
            return [
                remote_refs[lh] for (lh, _, _) in fetch_refs
                if remote_refs[lh] not in self.repo.object_store
            ]

        try:
            _remote, location = get_remote_repo(self.repo, url)
            client, path = get_transport_and_path(location)
        except Exception as exc:
            raise SCMError(
                f"'{url}' is not a valid Git remote or URL") from exc

        with Tqdm(desc="Fetching git refs",
                  bar_format=self.BAR_FMT_NOTOTAL) as pbar:

            def progress(msg_b):
                msg = msg_b.decode("ascii").strip()
                pbar.update_msg(msg)
                pbar.refresh()
                logger.trace(msg)

            fetch_result = client.fetch(
                path,
                self.repo,
                progress=progress,
                determine_wants=determine_wants,
            )
        for (lh, rh, _) in fetch_refs:
            try:
                if rh in self.repo.refs:
                    check_diverged(self.repo, self.repo.refs[rh],
                                   fetch_result.refs[lh])
            except DivergedBranches:
                if not force:
                    overwrite = False
                    if on_diverged:
                        overwrite = on_diverged(
                            os.fsdecode(rh),
                            os.fsdecode(fetch_result.refs[lh]))
                    if not overwrite:
                        continue
            self.repo.refs[rh] = fetch_result.refs[lh]

    def _stash_iter(self, ref: str):
        stash = self._get_stash(ref)
        yield from stash.stashes()

    def _stash_push(
        self,
        ref: str,
        message: Optional[str] = None,
        include_untracked: Optional[bool] = False,
    ) -> Tuple[Optional[str], bool]:
        from dulwich.repo import InvalidUserIdentity

        from dvc.scm.git import Stash

        if include_untracked or ref == Stash.DEFAULT_STASH:
            # dulwich stash.push does not support include_untracked and does
            # not touch working tree
            raise NotImplementedError

        stash = self._get_stash(ref)
        message_b = message.encode("utf-8") if message else None
        try:
            rev = stash.push(message=message_b)
        except InvalidUserIdentity as exc:
            raise SCMError(
                "Git username and email must be configured") from exc
        return os.fsdecode(rev), True

    def _stash_apply(self, rev: str):
        raise NotImplementedError

    def _stash_drop(self, ref: str, index: int):
        from dvc.scm.git import Stash

        if ref == Stash.DEFAULT_STASH:
            raise NotImplementedError

        stash = self._get_stash(ref)
        try:
            stash.drop(index)
        except ValueError as exc:
            raise SCMError("Failed to drop stash entry") from exc

    def describe(
        self,
        rev: str,
        base: Optional[str] = None,
        match: Optional[str] = None,
        exclude: Optional[str] = None,
    ) -> Optional[str]:
        if not base:
            base = "refs/tags"
        for ref in self.iter_refs(base=base):
            if (match and not fnmatch.fnmatch(ref, match)) or (
                    exclude and fnmatch.fnmatch(ref, exclude)):
                continue
            if self.get_ref(ref, follow=False) == rev:
                return ref
        return None

    def diff(self, rev_a: str, rev_b: str, binary=False) -> str:
        from dulwich.patch import write_tree_diff

        commit_a = self.repo[os.fsencode(rev_a)]
        commit_b = self.repo[os.fsencode(rev_b)]

        buf = BytesIO()
        write_tree_diff(buf, self.repo.object_store, commit_a.tree,
                        commit_b.tree)
        return buf.getvalue().decode("utf-8")

    def reset(self, hard: bool = False, paths: Iterable[str] = None):
        raise NotImplementedError

    def checkout_index(
        self,
        paths: Optional[Iterable[str]] = None,
        force: bool = False,
        ours: bool = False,
        theirs: bool = False,
    ):
        raise NotImplementedError

    def status(
        self,
        ignored: bool = False
    ) -> Tuple[Mapping[str, Iterable[str]], Iterable[str], Iterable[str]]:
        from dulwich.porcelain import status as git_status

        staged, unstaged, untracked = git_status(self.root_dir,
                                                 ignored=ignored)
        return (
            {
                status: [os.fsdecode(name) for name in paths]
                for status, paths in staged.items() if paths
            },
            [os.fsdecode(name) for name in unstaged],
            [os.fsdecode(name) for name in untracked],
        )

    def _reset(self) -> None:
        self.__dict__.pop("ignore_manager", None)

    def merge(
        self,
        rev: str,
        commit: bool = True,
        msg: Optional[str] = None,
        squash: bool = False,
    ) -> Optional[str]:
        raise NotImplementedError
Exemple #18
0
class Wiki(HookMixin):
    path = None
    base_path = '/'
    default_ref = 'master'
    default_committer_name = 'Anon'
    default_committer_email = '*****@*****.**'
    index_page = 'home'
    repo = None

    def __init__(self, path):
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            self.repo = Repo.init(path, mkdir=True)
            # TODO add first commit here

        self.path = path

    def __repr__(self):
        return "Wiki: {0}".format(self.path)

    def commit(self, name, email, message, files):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        if isinstance(name, text_type):
            name = name.encode('utf-8')
        if isinstance(email, text_type):
            email = email.encode('utf-8')
        if isinstance(message, text_type):
            message = message.encode('utf-8')
        author = committer = "{0} <{1}>".format(name, email).encode()
        self.repo.stage(files)
        return self.repo.do_commit(message=message,
                                   committer=committer,
                                   author=author)

    def get_page(self, name, sha='HEAD'):
        """Get page data, partials, commit info.

        :param name: Name of page.
        :param sha: Commit sha.
        :return: dict

        """
        return WikiPage(name, self, sha=sha)

    def get_index(self):
        """Get repo index of head.

        :return: list -- List of dicts

        """
        rv = []
        index = self.repo.open_index()
        for name in index:
            rv.append(dict(name=filename_to_cname(name),
                           filename=name,
                           ctime=index[name].ctime[0],
                           mtime=index[name].mtime[0],
                           sha=index[name].sha,
                           size=index[name].size))

        return rv
#!/usr/bin/env python3
import os, sys, socket, traceback, json, yaml, getpass
from dulwich.repo import Repo

repo_path = os.path.realpath(os.path.expanduser('~/.myrepo'))
if not os.path.exists(repo_path):
    os.mkdir(repo_path)
if not os.path.exists('{}/.git'.format(repo_path)):
    repo = Repo.init(repo_path)
else:
    repo = Repo(repo_path)

yaml.dump(repo, sys.stdout)
index = repo.open_index()
MSG = f'  repo index path={index.path}, index list={list(index)}, '
yaml.dump(MSG, sys.stdout)

f = open(f'{repo_path}/foo', 'wb')
_ = f.write(b"monty1")
f.close()
repo.stage([b"foo"])

print(",".join(
    [f.decode(sys.getfilesystemencoding()) for f in repo.open_index()]))

commit_id = repo.do_commit(
    b"The first commit")  #, committer=getpass.getuser().encode())
print(f'    commit_id={commit_id},     repo_head = {repo.head()}   ')

#repo = Repo("myrepo")
Exemple #20
0
    def find_git_mtimes(self, context, silent_build):
        """
        Use git to find the mtimes of the files we care about
        """
        if not context.use_git_timestamps:
            return {}

        parent_dir = context.parent_dir
        root_folder = context.git_root

        # Can't use git timestamps if it's just a shallow clone
        # Otherwise all the files get the timestamp of the latest commit
        if context.use_git_timestamps and os.path.exists(
                os.path.join(root_folder, ".git", "shallow")):
            raise HarpoonError("Can't get git timestamps from a shallow clone",
                               directory=parent_dir)

        git = Repo(root_folder)
        mtimes = {}
        all_files = set(git.open_index())

        use_files = set()
        for filename in all_files:
            relpath = os.path.relpath(os.path.join(root_folder, filename),
                                      context.parent_dir)

            # Only include files under the parent_dir
            if relpath.startswith("../"):
                continue

            # Ignore files that we don't want git_timestamps from
            if context.use_git_timestamps and type(
                    context.use_git_timestamps) is not bool:
                match = False
                for line in context.use_git_timestamps:
                    if fnmatch.fnmatch(relpath, line):
                        match = True
                        break
                if not match:
                    continue

            # Matched is true by default if
            # * Have context.exclude
            # * No context.exclude and no context.include
            matched = context.exclude or not any(
                [context.exclude, context.include])

            # Anything not matching exclude gets included
            if context.exclude:
                for line in context.exclude:
                    if fnmatch.fnmatch(relpath, line):
                        matched = False

            # Anything matching include gets included
            if context.include:
                for line in context.include:
                    if fnmatch.fnmatch(relpath, line):
                        matched = True
                        break

            # Either didn't match any exclude or matched an include
            if matched:
                use_files.add(filename)

        if not silent_build:
            log.info(
                "Finding modified times for %s/%s git controlled files in %s",
                len(use_files), len(all_files), root_folder)
        for entry in git.get_walker(paths=use_files):
            date = entry.commit.author_time
            for changes in entry.changes():
                if type(changes) is not list:
                    changes = [changes]
                for change in changes:
                    path = change.new.path
                    if root_folder and change.new.path and context.parent_dir:
                        new_relpath = os.path.relpath(
                            os.path.join(root_folder, change.new.path),
                            context.parent_dir)
                        if path in use_files and mtimes.get(
                                new_relpath, 0
                        ) < date and not new_relpath.startswith("../"):
                            mtimes[new_relpath] = date

            if len(use_files - set(mtimes)) == 0:
                break

        return mtimes
Exemple #21
0
class Repo(object):
    """
    Wrapper around a libgit Repository that knows:

    * How to get all the files in the repository
    * How to get the oid of HEAD
    * How to get the commit times of the files we want commit times for

    It's written with speed in mind, given the constraints of making
    performant code in python!
    """
    def __init__(self, root_folder):
        self.git = Repository(root_folder)

    def all_files(self):
        """Return a set of all the files under git control"""
        return set([entry.decode() for entry, _ in self.git.open_index().items()])

    @property
    def first_commit(self):
        """Return the oid of HEAD"""
        return self.git.head().decode()

    def file_commit_times(self, use_files_paths, debug=False):
        """
        Traverse the commits in the repository, starting from HEAD until we have
        found the commit times for all the files we care about.

        Yield each file once, only when it is found to be changed in some commit.

        If self.debug is true, also output log.debug for the speed we are going
        through commits (output commits/second every 1000 commits and every
        100000 commits)
        """
        prefixes = PrefixTree()
        prefixes.fill(use_files_paths)

        for entry in self.git.get_walker():
            # Commit time taking into account the timezone
            commit_time = entry.commit.commit_time - entry.commit.commit_timezone

            # Get us the two different tree structures between parents and current
            cf_and_pf, changes = self.tree_structures_for(()
                , entry.commit.tree
                , [self.git.get_object(oid).tree for oid in entry.commit.parents]
                , prefixes
                )

            # Deep dive into any differences
            difference = []
            if changes:
                cfs_and_pfs = [(cf_and_pf, changes)]
                while cfs_and_pfs:
                    nxt, changes = cfs_and_pfs.pop(0)
                    for thing, changes, is_path in self.differences_between(nxt[0], nxt[1], changes, prefixes):
                        if is_path:
                            found = prefixes.remove(thing[:-1], thing[-1])
                            if found:
                                difference.append('/'.join(thing))
                        else:
                            cfs_and_pfs.append((thing, changes))

            # Only yield if there was a difference
            if difference:
                yield entry.commit.sha().hexdigest(), commit_time, difference

            # If nothing remains, then break!
            if not prefixes:
                break

    def entries_in_tree_oid(self, prefix, tree_oid):
        """Find the tree at this oid and return entries prefixed with ``prefix``"""
        try:
            tree = self.git.get_object(tree_oid)
        except KeyError:
            log.warning("Couldn't find object {0}".format(tree_oid))
            return empty
        else:
            return frozenset(self.entries_in_tree(prefix, tree))

    def entries_in_tree(self, prefix, tree):
        """
        Traverse the entries in this tree and yield (prefix, is_tree, oid)

        Where prefix is a tuple of the given prefix and the name of the entry.
        """
        for entry in tree.items():
            if prefix:
                new_prefix = prefix + (entry.path.decode(), )
            else:
                new_prefix = (entry.path.decode(), )

            yield (new_prefix, stat.S_ISDIR(entry.mode), entry.sha)

    def tree_structures_for(self, prefix, current_oid, parent_oids, prefixes):
        """
        Return the entries for this commit, the entries of the parent commits,
        and the difference between the two (current_files - parent_files)
        """
        if prefix and prefixes and prefix not in prefixes:
            return empty, empty

        parent_files = set()
        for oid in parent_oids:
            parent_files.update(self.entries_in_tree_oid(prefix, oid))

        current_files = self.entries_in_tree_oid(prefix, current_oid)
        return (current_files, parent_files), (current_files - parent_files)

    def differences_between(self, current_files, parent_files, changes, prefixes):
        """
        yield (thing, changes, is_path)

        If is_path is true, changes is None and thing is the path as a tuple.

        If is_path is false, thing is the current_files and parent_files for
        that changed treeentry and changes is the difference between current_files
        and parent_files.

        The code here is written to squeeze as much performance as possible out
        of this operation.
        """
        parent_oid = None

        if any(is_tree for _, is_tree, _ in changes):
            if len(changes) == 1:
                wanted_path = list(changes)[0][0]
                parent_oid = frozenset([oid for path, is_tree, oid in parent_files if path == wanted_path and is_tree])
            else:
                parent_values = defaultdict(set)
                parent_changes = parent_files - current_files
                for path, is_tree, oid in parent_changes:
                    if is_tree:
                        parent_values[path].add(oid)

        for path, is_tree, oid in changes:
            if is_tree and path not in prefixes:
                continue

            if not is_tree:
                yield path, None, True
            else:
                parent_oids = parent_oid if parent_oid is not None else parent_values.get(path, empty)
                cf_and_pf, changes = self.tree_structures_for(path, oid, parent_oids, prefixes)
                if changes:
                    yield cf_and_pf, changes, False
Exemple #22
0
class Unleash(object):
    def _create_child_commit(self, parent_ref):
        parent = ResolvedRef(self.repo, parent_ref)

        if not parent.is_definite:
            raise InvocationError('{} is ambiguous: {}'.format(
                parent.ref, parent.full_name
            ))

        if not parent.found:
            raise InvocationError('Could not resolve "{}"'.format(parent.ref))

        # prepare the release commit
        commit = MalleableCommit.from_existing(
            self.repo, parent.id
        )

        # update author and such
        if opts['author'] is None:
            commit.author = '{} <{}>'.format(
                self.gitconfig.get('user', 'name'),
                self.gitconfig.get('user', 'email'),
            )
            commit.commiter = commit.author
        else:
            commit.author = opts['author']
            commit.committer = opts['author']

        now = int(time.time())
        ltz = get_local_timezone(now)

        commit.author_time = now
        commit.author_timezone = ltz

        commit.commit_time = now
        commit.commit_timezone = ltz

        commit.parent_ids = [parent.id]

        return commit

    def __init__(self, plugins=[]):
        self.plugins = plugins

    def _init_repo(self):
        self.repo = Repo(opts['root'])
        self.gitconfig = self.repo.get_config_stack()

    def _perform_step(self, signal_name):
        log.debug('begin: {}'.format(signal_name))

        begin = time.time()

        # create new top-level context
        with new_local_stack() as nc:
            nc['issues'] = issues.channel(signal_name)
            self.plugins.notify(signal_name)

        duration = time.time() - begin

        log.debug('end: {}, took {:.4f}s'.format(signal_name, duration))

    def create_release(self, ref):
        with new_local_stack() as nc:
            # resolve reference
            base_ref = ResolvedRef(self.repo, ref)
            log.debug(
                'Base ref: {} ({})'.format(base_ref.full_name, base_ref.id)
            )
            orig_tree = base_ref.get_object().tree

            # initialize context
            nc['commit'] = self._create_child_commit(ref)
            nc['issues'] = IssueCollector(log=log)
            nc['info'] = {'ref': base_ref}
            nc['log'] = log

            try:
                self._perform_step('collect_info')
                log.debug('info: {}'.format(pformat(info)))

                self._perform_step('prepare_release')
                self._perform_step('lint_release')

                if opts['inspect']:
                    log.info(unicode(commit))

                    # check out to temporary directory
                    with TempDir() as inspect_dir:
                        commit.export_to(inspect_dir)

                        log.info(
                            'You are being dropped into an interactive shell '
                            'inside a temporary checkout of the release '
                            'commit. No changes you make will persist. Exit '
                            'the shell to abort the release process.\n\n'
                            'Use "exit 2" to continue the release.'
                        )

                    status = run_user_shell(cwd=inspect_dir)

                    if status != 2:
                        raise InvocationError(
                            'Aborting release, got exit code {} from shell.'.
                            format(status))

                # save release commit
                release_commit = nc['commit']

                # we're done with the release, now create the dev commit
                nc['commit'] = self._create_child_commit(ref)
                nc['issues'] = IssueCollector(log=log)

                # creating development commit
                self._perform_step('prepare_dev')

                if opts['dry_run']:
                    log.info('Not saving created commits. Dry-run successful.')
                    return

                # we've got both commits, now tag the release
                confirm_prompt(
                    'Advance dev to {} and release {}?'
                    .format(info['dev_version'], info['release_version'])
                )

                release_tag = 'refs/tags/{}'.format(info['release_version'])

                if release_tag in self.repo.refs:
                    confirm_prompt(
                        'Repository already contains {}, really overwrite tag?'
                        .format(release_tag),
                    )

                release_hash = release_commit.save()

                log.info('{}: {}'.format(release_tag, release_hash))
                self.repo.refs[release_tag] = release_hash

                # save the dev commit
                dev_hash = nc['commit'].save()

                # if our release commit formed from a branch, we set that branch
                # to our new dev commit
                assert base_ref.is_definite and base_ref.found
                if not base_ref.is_ref or\
                        not base_ref.full_name.startswith('refs/heads'):
                    log.warning('Release commit does not originate from a '
                                'branch; dev commit will not be reachable.')
                    log.info('Dev commit: {}'.format(dev_hash))
                else:
                    self.repo.refs[base_ref.full_name] = dev_hash

                    # change the branch to point at our new dev commit
                    log.info('{}: {}'.format(
                        base_ref.full_name, dev_hash
                    ))

                    self._update_working_copy(base_ref, orig_tree)
            except PluginError:
                # just abort, error has been logged already
                log.debug('Exiting due to PluginError')
                return

    def _update_working_copy(self, base_ref, orig_tree):
        head_ref = ResolvedRef(self.repo, 'HEAD')
        if not head_ref.is_definite or not head_ref.is_symbolic\
                or not head_ref.target == base_ref.full_name:
            log.info('HEAD is not a symbolic ref to {}, leaving your '
                     'working copy untouched.')
            return

        if not self.repo.has_index():
            log.info('Repository has no index, not updating working copy.')
            return

        index = self.repo.open_index()

        changes = list(index.changes_from_tree(
            self.repo.object_store,
            orig_tree,
        ))

        if changes:
            log.warning('There are staged changes in your index. Will not '
                        'update working copy.\n\n'
                        'You will need to manually change your HEAD to '
                        '{}.'.format(base_ref.id))
            return

        # reset the index to the new dev commit
        confirm_prompt(
            'Do you want to reset your index to the new dev commit and check '
            'it out? Unsaved changes to your working copy may be overwritten!'
        )
        log.info('Resetting index and checking out dev commit.')
        build_index_from_tree(
            self.repo.path,
            self.repo.index_path(),
            self.repo.object_store,
            base_ref.get_object().tree,
        )

    def publish(self, ref):
        if ref is None:
            tags = sorted(
                (t for t in self.repo.refs.as_dict().iteritems() if
                 t[0].startswith('refs/tags')),
                key=lambda (_, sha): self.repo[sha].commit_time,
                reverse=True,
            )

            if not tags:
                log.error('Could not find a tag to publish.')
                return

            ref = tags[0][0]

        pref = ResolvedRef(self.repo, ref)

        with new_local_stack() as nc:
            nc['commit'] = MalleableCommit.from_existing(self.repo, pref.id)
            log.debug('Release tag: {}'.format(commit))

            nc['issues'] = IssueCollector(log=log)
            nc['info'] = {'ref': pref}
            nc['log'] = log

            try:
                self._perform_step('collect_info')
                log.debug('info: {}'.format(pformat(info)))
                self._perform_step('publish_release')
            except PluginError:
                log.debug('Exiting due to PluginError')
                return
Exemple #23
0
class Efesto:
    def render_html(self, item):
        self.start_response('200 OK',
                            [('Content-Type', 'text/html; charset=utf-8')])
        header = self.get_html_template(self.header)
        footer = self.get_html_template(self.footer)
        sha = self.git_index[item][8]
        body = self.apply_vars(self.repo[sha].as_raw_string())
        return [header, body, footer]

    def render_rst(self, item):
        self.start_response('200 OK',
                            [('Content-Type', 'text/html; charset=utf-8')])
        sha = self.git_index[item][8]
        blob = self.repo[sha]
        header = self.get_html_template(self.header)
        footer = self.get_html_template(self.footer)
        body = unicode(
            publish_parts(self.apply_vars(blob.as_raw_string()),
                          writer_name='html')['html_body']).encode('utf8')
        return [header, self.prefix, body, self.suffix, footer]

    def __init__(self,
                 path='.',
                 prefix='',
                 suffix='',
                 header='header.html',
                 footer='footer.html',
                 notfound='notfound.html'):
        self.repo = Repo(path)
        self.prefix = prefix
        self.suffix = suffix
        self.header = header
        self.footer = footer
        self.notfound = notfound
        self.allowed_ext = {'html': self.render_html, 'rst': self.render_rst}

    def __call__(self, environ, start_response):
        self.start_response = start_response
        self.env = environ
        self.git_index = self.repo.open_index()
        requested_item = environ['PATH_INFO'][1:].rstrip('/')
        if requested_item == '': requested_item = 'index'
        self.page = requested_item
        return self.render_page()

    def render_page(self):
        for ext in self.allowed_ext.keys():
            if "%s.%s" % (self.page, ext) in self.git_index:
                return self.allowed_ext[ext]("%s.%s" % (self.page, ext))
        return self.render_notfound()

    def apply_vars(self, body):
        for env in self.env.keys():
            body = body.replace("|%s|" % env, str(self.env[env]))
        return body

    def get_html_template(self, html):
        path = self.page
        while True:
            current_path = path
            path = '/'.join(path.split('/')[:-1])
            if current_path == path: break
            item = ("%s/%s" % (path, html))[1:]
            if item in self.git_index:
                sha = self.git_index[item][8]
                return self.apply_vars(self.repo[sha].as_raw_string())
        return ''

    def render_notfound(self):
        self.start_response('404 Not Found',
                            [('Content-Type', 'text/html; charset=utf-8')])
        header = self.get_html_template(self.header)
        footer = self.get_html_template(self.footer)
        body = self.get_html_template(self.notfound)
        if body == '':
            body = '<h1>Not Found</h1>'
        return [header, body, footer]
Exemple #24
0
#!/usr/bin/env python2

import os.path
import urlparse
from email.utils import formatdate
from dulwich.repo import Repo
from dulwich.objects import Blob, Tree, Commit
from docutils import io, nodes
from docutils.core import publish_doctree, publish_from_doctree
from render import MyWriter

repo = Repo(".")
commit_sha = repo.head()
commit = repo.get_object(commit_sha)
index = repo.open_index()
assert not list(index.changes_from_tree(repo.object_store, commit.tree)), "uncommited changes"

store = repo.object_store


def render_rst(blob, path):
    doc = publish_doctree(blob.as_raw_string())
    for node in doc.traverse(nodes.reference):
        uri = urlparse.urlparse(node['refuri'])
        if not uri.netloc and os.path.basename(uri.path) == "README.rst":
            node['refuri'] = urlparse.urlunparse(
                (uri.scheme, uri.netloc, uri.path[:-10] or "./", uri.params, uri.query, uri.fragment))

    output = publish_from_doctree(
        doc,
        destination_path=path,
                else:
                    file_name = item[www+2:dotsomething]
                                        
                #print file_name
                
                outfile = open('./' + file_name + '.csv', "ab")
                writer = csv.writer(outfile)
                writer.writerow([clean_url])
                #final_list.append(clean_url)

    #print final_list                                                                                    #remove '#' at beginning of line to print all pdf urls
    #auto commit
    repo = Repo("unscraper")
    repo
    
    index = repo.open_index()
    print(index.path.decode(sys.getfilesystemencoding()))
    
    list(index)

    f = open('unscraper/thisIsATest.md','wb')
    _ = f.write(b"monty")
    f.close()

    repo.stage([b"thisIsATest"])

    print(",".join([f.decode(sys.getfilesystemencoding()) for f in repo.open_index()]))

    commit_id = repo.do_commit(
        b"testing dulwich", committer=b"Aly <*****@*****.**>")
Exemple #26
0
class DulwichBackend(BaseGitBackend):  # pylint:disable=abstract-method
    """Dulwich Git backend."""

    def __init__(  # pylint:disable=W0231
        self, root_dir=os.curdir, search_parent_directories=True
    ):
        from dulwich.errors import NotGitRepository
        from dulwich.repo import Repo

        try:
            if search_parent_directories:
                self.repo = Repo.discover(start=root_dir)
            else:
                self.repo = Repo(root_dir)
        except NotGitRepository as exc:
            raise SCMError(f"{root_dir} is not a git repository") from exc

        self._stashes: dict = {}

    def close(self):
        self.repo.close()

    @property
    def root_dir(self) -> str:
        return self.repo.path

    @staticmethod
    def clone(
        url: str,
        to_path: str,
        rev: Optional[str] = None,
        shallow_branch: Optional[str] = None,
    ):
        raise NotImplementedError

    @staticmethod
    def is_sha(rev: str) -> bool:
        raise NotImplementedError

    @property
    def dir(self) -> str:
        return self.repo.commondir()

    def add(self, paths: Iterable[str]):
        from dvc.utils.fs import walk_files

        if isinstance(paths, str):
            paths = [paths]

        files = []
        for path in paths:
            if not os.path.isabs(path):
                path = os.path.join(self.root_dir, path)
            if os.path.isdir(path):
                files.extend(walk_files(path))
            else:
                files.append(path)

        for fpath in files:
            # NOTE: this doesn't check gitignore, same as GitPythonBackend.add
            self.repo.stage(relpath(fpath, self.root_dir))

    def commit(self, msg: str):
        from dulwich.porcelain import commit

        commit(self.root_dir, message=msg)

    def checkout(
        self, branch: str, create_new: Optional[bool] = False, **kwargs,
    ):
        raise NotImplementedError

    def pull(self, **kwargs):
        raise NotImplementedError

    def push(self):
        raise NotImplementedError

    def branch(self, branch: str):
        raise NotImplementedError

    def tag(self, tag: str):
        raise NotImplementedError

    def untracked_files(self) -> Iterable[str]:
        raise NotImplementedError

    def is_tracked(self, path: str) -> bool:
        from dvc.path_info import PathInfo

        rel = PathInfo(path).relative_to(self.root_dir).as_posix().encode()
        rel_dir = rel + b"/"
        for path in self.repo.open_index():
            if path == rel or path.startswith(rel_dir):
                return True
        return False

    def is_dirty(self, **kwargs) -> bool:
        raise NotImplementedError

    def active_branch(self) -> str:
        raise NotImplementedError

    def list_branches(self) -> Iterable[str]:
        raise NotImplementedError

    def list_tags(self) -> Iterable[str]:
        raise NotImplementedError

    def list_all_commits(self) -> Iterable[str]:
        raise NotImplementedError

    def get_tree(self, rev: str, **kwargs) -> BaseTree:
        raise NotImplementedError

    def get_rev(self) -> str:
        raise NotImplementedError

    def resolve_rev(self, rev: str) -> str:
        raise NotImplementedError

    def resolve_commit(self, rev: str) -> str:
        raise NotImplementedError

    def branch_revs(self, branch: str, end_rev: Optional[str] = None):
        raise NotImplementedError

    def _get_stash(self, ref: str):
        from dulwich.stash import Stash as DulwichStash

        if ref not in self._stashes:
            self._stashes[ref] = DulwichStash(self.repo, ref=os.fsencode(ref))
        return self._stashes[ref]

    def is_ignored(self, path):
        from dulwich import ignore

        manager = ignore.IgnoreFilterManager.from_repo(self.repo)
        return manager.is_ignored(relpath(path, self.root_dir))

    def set_ref(
        self,
        name: str,
        new_ref: str,
        old_ref: Optional[str] = None,
        message: Optional[str] = None,
        symbolic: Optional[bool] = False,
    ):
        name_b = os.fsencode(name)
        new_ref_b = os.fsencode(new_ref)
        old_ref_b = os.fsencode(old_ref) if old_ref else None
        message_b = message.encode("utf-8") if message else None
        if symbolic:
            return self.repo.refs.set_symbolic_ref(
                name_b, new_ref_b, message=message
            )
        if not self.repo.refs.set_if_equals(
            name_b, old_ref_b, new_ref_b, message=message_b
        ):
            raise SCMError(f"Failed to set '{name}'")

    def get_ref(self, name, follow: Optional[bool] = True) -> Optional[str]:
        from dulwich.refs import parse_symref_value

        name_b = os.fsencode(name)
        if follow:
            try:
                ref = self.repo.refs[name_b]
            except KeyError:
                ref = None
        else:
            ref = self.repo.refs.read_ref(name_b)
            try:
                if ref:
                    ref = parse_symref_value(ref)
            except ValueError:
                pass
        if ref:
            return os.fsdecode(ref)
        return None

    def remove_ref(self, name: str, old_ref: Optional[str] = None):
        name_b = name.encode("utf-8")
        old_ref_b = old_ref.encode("utf-8") if old_ref else None
        if not self.repo.refs.remove_if_equals(name_b, old_ref_b):
            raise SCMError(f"Failed to remove '{name}'")

    def iter_refs(self, base: Optional[str] = None):
        base_b = os.fsencode(base) if base else None
        for key in self.repo.refs.keys(base=base_b):
            if base:
                if base.endswith("/"):
                    base = base[:-1]
                yield "/".join([base, os.fsdecode(key)])
            else:
                yield os.fsdecode(key)

    def iter_remote_refs(self, url: str, base: Optional[str] = None):
        from dulwich.client import get_transport_and_path
        from dulwich.porcelain import get_remote_repo

        try:
            _remote, location = get_remote_repo(self.repo, url)
            client, path = get_transport_and_path(location)
        except Exception as exc:
            raise SCMError(
                f"'{url}' is not a valid Git remote or URL"
            ) from exc

        if base:
            yield from (
                os.fsdecode(ref)
                for ref in client.get_refs(path)
                if ref.startswith(os.fsencode(base))
            )
        else:
            yield from (os.fsdecode(ref) for ref in client.get_refs(path))

    def get_refs_containing(self, rev: str, pattern: Optional[str] = None):
        raise NotImplementedError

    def push_refspec(
        self,
        url: str,
        src: Optional[str],
        dest: str,
        force: bool = False,
        on_diverged: Optional[Callable[[str, str], bool]] = None,
    ):
        from dulwich.client import get_transport_and_path
        from dulwich.errors import NotGitRepository, SendPackError
        from dulwich.porcelain import (
            DivergedBranches,
            check_diverged,
            get_remote_repo,
        )

        dest_refs, values = self._push_dest_refs(src, dest)

        try:
            _remote, location = get_remote_repo(self.repo, url)
            client, path = get_transport_and_path(location)
        except Exception as exc:
            raise SCMError(
                f"'{url}' is not a valid Git remote or URL"
            ) from exc

        def update_refs(refs):
            new_refs = {}
            for ref, value in zip(dest_refs, values):
                if ref in refs:
                    local_sha = self.repo.refs[ref]
                    remote_sha = refs[ref]
                    try:
                        check_diverged(self.repo, remote_sha, local_sha)
                    except DivergedBranches:
                        if not force:
                            overwrite = False
                            if on_diverged:
                                overwrite = on_diverged(
                                    os.fsdecode(ref), os.fsdecode(remote_sha),
                                )
                            if not overwrite:
                                continue
                new_refs[ref] = value
            return new_refs

        def progress(msg):
            logger.trace("git send_pack: %s", msg)

        try:
            client.send_pack(
                path,
                update_refs,
                self.repo.object_store.generate_pack_data,
                progress=progress,
            )
        except (NotGitRepository, SendPackError) as exc:
            raise SCMError("Git failed to push '{src}' to '{url}'") from exc

    def _push_dest_refs(
        self, src: str, dest: str
    ) -> Tuple[Iterable[bytes], Iterable[bytes]]:
        from dulwich.objects import ZERO_SHA

        if src is not None and src.endswith("/"):
            src_b = os.fsencode(src)
            keys = self.repo.refs.subkeys(src_b)
            values = [self.repo.refs[b"".join([src_b, key])] for key in keys]
            dest_refs = [b"".join([os.fsencode(dest), key]) for key in keys]
        else:
            if src is None:
                values = [ZERO_SHA]
            else:
                values = [self.repo.refs[os.fsencode(src)]]
            dest_refs = [os.fsencode(dest)]
        return dest_refs, values

    def fetch_refspecs(
        self,
        url: str,
        refspecs: Iterable[str],
        force: Optional[bool] = False,
        on_diverged: Optional[Callable[[str, str], bool]] = None,
    ):
        from dulwich.client import get_transport_and_path
        from dulwich.objectspec import parse_reftuples
        from dulwich.porcelain import (
            DivergedBranches,
            check_diverged,
            get_remote_repo,
        )

        fetch_refs = []

        def determine_wants(remote_refs):
            fetch_refs.extend(
                parse_reftuples(
                    remote_refs,
                    self.repo.refs,
                    [os.fsencode(refspec) for refspec in refspecs],
                    force=force,
                )
            )
            return [
                remote_refs[lh]
                for (lh, _, _) in fetch_refs
                if remote_refs[lh] not in self.repo.object_store
            ]

        try:
            _remote, location = get_remote_repo(self.repo, url)
            client, path = get_transport_and_path(location)
        except Exception as exc:
            raise SCMError(
                f"'{url}' is not a valid Git remote or URL"
            ) from exc

        def progress(msg):
            logger.trace("git fetch: %s", msg)

        fetch_result = client.fetch(
            path, self.repo, progress=progress, determine_wants=determine_wants
        )
        for (lh, rh, _) in fetch_refs:
            try:
                if rh in self.repo.refs:
                    check_diverged(
                        self.repo, self.repo.refs[rh], fetch_result.refs[lh]
                    )
            except DivergedBranches:
                if not force:
                    overwrite = False
                    if on_diverged:
                        overwrite = on_diverged(
                            os.fsdecode(rh), os.fsdecode(fetch_result.refs[lh])
                        )
                    if not overwrite:
                        continue
            self.repo.refs[rh] = fetch_result.refs[lh]

    def _stash_iter(self, ref: str):
        stash = self._get_stash(ref)
        yield from stash.stashes()

    def _stash_push(
        self,
        ref: str,
        message: Optional[str] = None,
        include_untracked: Optional[bool] = False,
    ) -> Tuple[Optional[str], bool]:
        from dvc.scm.git import Stash

        if include_untracked or ref == Stash.DEFAULT_STASH:
            # dulwich stash.push does not support include_untracked and does
            # not touch working tree
            raise NotImplementedError

        stash = self._get_stash(ref)
        message_b = message.encode("utf-8") if message else None
        rev = stash.push(message=message_b)
        return os.fsdecode(rev), True

    def _stash_apply(self, rev: str):
        raise NotImplementedError

    def reflog_delete(
        self, ref: str, updateref: bool = False, rewrite: bool = False
    ):
        raise NotImplementedError

    def describe(
        self,
        rev: str,
        base: Optional[str] = None,
        match: Optional[str] = None,
        exclude: Optional[str] = None,
    ) -> Optional[str]:
        if not base:
            base = "refs/tags"
        for ref in self.iter_refs(base=base):
            if (match and not fnmatch.fnmatch(ref, match)) or (
                exclude and fnmatch.fnmatch(ref, exclude)
            ):
                continue
            if self.get_ref(ref, follow=False) == rev:
                return ref
        return None

    def diff(self, rev_a: str, rev_b: str, binary=False) -> str:
        from dulwich.patch import write_tree_diff

        commit_a = self.repo[os.fsencode(rev_a)]
        commit_b = self.repo[os.fsencode(rev_b)]

        buf = BytesIO()
        write_tree_diff(
            buf, self.repo.object_store, commit_a.tree, commit_b.tree
        )
        return buf.getvalue().decode("utf-8")

    def reset(self, hard: bool = False, paths: Iterable[str] = None):
        raise NotImplementedError

    def checkout_paths(self, paths: Iterable[str], force: bool = False):
        raise NotImplementedError
Exemple #27
0
class Wiki(HookMixin):
    path = None
    base_path = '/'
    default_ref = 'master'
    default_committer_name = 'Anon'
    default_committer_email = '*****@*****.**'
    index_page = 'home'
    repo = None

    def __init__(self, path):
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            self.repo = Repo.init(path, mkdir=True)
            # TODO add first commit here

        self.path = path

    def __repr__(self):
        return "Wiki: %s" % self.path

    def commit(self, name, email, message, files):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        if isinstance(name, text_type):
            name = name.encode('utf-8')
        if isinstance(email, text_type):
            email = email.encode('utf-8')
        if isinstance(message, text_type):
            message = message.encode('utf-8')
        author = committer = "%s <%s>".format(name, email).encode()
        self.repo.stage(files)
        return self.repo.do_commit(message=message,
                                   committer=committer,
                                   author=author)

    def get_page(self, name, sha='HEAD'):
        """Get page data, partials, commit info.

        :param name: Name of page.
        :param sha: Commit sha.
        :return: dict

        """
        return WikiPage(name, self, sha=sha)

    def get_index(self):
        """Get repo index of head.

        :return: list -- List of dicts

        """
        rv = []
        index = self.repo.open_index()
        for name in index:
            rv.append(
                dict(name=filename_to_cname(name),
                     filename=name,
                     ctime=index[name].ctime[0],
                     mtime=index[name].mtime[0],
                     sha=index[name].sha,
                     size=index[name].size))

        return rv
Exemple #28
0
    def find_git_mtimes(self, context, silent_build):
        """
        Use git to find the mtimes of the files we care about
        """
        if not context.use_git_timestamps:
            return {}

        parent_dir = context.parent_dir
        root_folder = context.git_root

        # Can't use git timestamps if it's just a shallow clone
        # Otherwise all the files get the timestamp of the latest commit
        if context.use_git_timestamps and os.path.exists(os.path.join(root_folder, ".git", "shallow")):
            raise HarpoonError("Can't get git timestamps from a shallow clone", directory=parent_dir)

        git = Repo(root_folder)
        mtimes = {}
        all_files = set(git.open_index())

        use_files = set()
        use_files_relpaths = set()
        for filename in all_files:
            relpath = os.path.relpath(os.path.join(root_folder, filename.decode('utf-8')), context.parent_dir)

            # Only include files under the parent_dir
            if relpath.startswith("../"):
                continue

            # Ignore files that we don't want git_timestamps from
            if context.use_git_timestamps and type(context.use_git_timestamps) is not bool:
                match = False
                for line in context.use_git_timestamps:
                    if fnmatch.fnmatch(relpath, line):
                        match = True
                        break
                if not match:
                    continue

            # Matched is true by default if
            # * Have context.exclude
            # * No context.exclude and no context.include
            matched = context.exclude or not any([context.exclude, context.include])

            # Anything not matching exclude gets included
            if context.exclude:
                for line in context.exclude:
                    if fnmatch.fnmatch(relpath, line):
                        matched = False

            # Anything matching include gets included
            if context.include:
                for line in context.include:
                    if fnmatch.fnmatch(relpath, line):
                        matched = True
                        break

            # Either didn't match any exclude or matched an include
            if matched:
                use_files.add(filename)
                use_files_relpaths.add(relpath)

        if not silent_build: log.info("Finding modified times for %s/%s git controlled files in %s", len(use_files), len(all_files), root_folder)

        first_commit = None
        cached_commit, cached_mtimes = self.get_cached_mtimes(root_folder, use_files_relpaths)
        for entry in git.get_walker():
            if first_commit is None:
                first_commit = entry.commit.id.decode('utf-8')

            if cached_commit and entry.commit.id.decode('utf-8') == cached_commit:
                new_mtimes = cached_mtimes
                new_mtimes.update(mtimes)
                mtimes = new_mtimes
                break

            date = entry.commit.author_time
            added = False
            for changes in entry.changes():
                if type(changes) is not list:
                    changes = [changes]
                for change in changes:
                    path = change.new.path
                    if root_folder and change.new.path and context.parent_dir:
                        if path in use_files:
                            new_relpath = os.path.relpath(os.path.join(root_folder, change.new.path.decode('utf-8')), context.parent_dir).encode('utf-8')
                            if not new_relpath.decode('utf-8').startswith("../"):
                                if mtimes.get(new_relpath, 0) < date:
                                    mtimes[new_relpath] = date
                                    added = True

            if added:
                if len(use_files - set(mtimes)) == 0:
                    break

        mtimes = dict((fn.decode('utf-8') if hasattr(fn, "decode") else fn, mtime) for fn, mtime in mtimes.items())
        if first_commit != cached_commit:
            self.set_cached_mtimes(root_folder, first_commit, mtimes, use_files_relpaths)
        return mtimes
Exemple #29
0
class Efesto:

    def render_html(self, item):
        self.start_response('200 OK', [('Content-Type','text/html; charset=utf-8')])
        header = self.get_html_template(self.header)
        footer = self.get_html_template(self.footer)
        sha = self.git_index[item][8]
        body = self.apply_vars(self.repo[sha].as_raw_string()) 
        return [header, body, footer]

    def render_rst(self, item):
        self.start_response('200 OK', [('Content-Type','text/html; charset=utf-8')])
        sha = self.git_index[item][8]
        blob = self.repo[sha]
        header = self.get_html_template(self.header)
        footer = self.get_html_template(self.footer)
        body = unicode(publish_parts(self.apply_vars(blob.as_raw_string()), writer_name='html')['html_body']).encode('utf8')
        return [header, self.prefix, body, self.suffix, footer]

    def __init__(self, path='.',prefix='',suffix='',header='header.html',footer='footer.html', notfound='notfound.html'):
        self.repo = Repo(path)
        self.prefix = prefix
        self.suffix = suffix
        self.header = header
        self.footer = footer
        self.notfound = notfound
        self.allowed_ext = {'html':self.render_html, 'rst':self.render_rst}

    def __call__(self, environ, start_response):
        self.start_response = start_response
        self.env = environ
        self.git_index = self.repo.open_index()
        requested_item = environ['PATH_INFO'][1:].rstrip('/')
        if requested_item == '': requested_item = 'index'
        self.page = requested_item
        return self.render_page()

    def render_page(self):
        for ext in self.allowed_ext.keys():
            if "%s.%s" % (self.page, ext) in self.git_index:
                return self.allowed_ext[ext]("%s.%s" % (self.page, ext))
        return self.render_notfound()

    def apply_vars(self, body):
        for env in self.env.keys():
            body = body.replace("|%s|" % env, str(self.env[env]))
        return body

    def get_html_template(self, html):
        path = self.page
        while True:
            current_path = path
            path = '/'.join(path.split('/')[:-1])
            if current_path == path: break
            item = ("%s/%s" % (path, html))[1:]
            if item in self.git_index:
                sha = self.git_index[item][8]
                return self.apply_vars(self.repo[sha].as_raw_string())
        return ''

    def render_notfound(self):
        self.start_response('404 Not Found', [('Content-Type','text/html; charset=utf-8')])
        header = self.get_html_template(self.header)
        footer = self.get_html_template(self.footer)
        body = self.get_html_template(self.notfound)
        if body == '':
            body = '<h1>Not Found</h1>'
        return [header, body, footer]
Exemple #30
0
class Gittle(object):
    """All paths used in Gittle external methods must be paths relative to the git repository
    """

    DEFAULT_COMMIT = "HEAD"
    DEFAULT_BRANCH = "master"
    DEFAULT_REMOTE = "origin"
    DEFAULT_MESSAGE = "**No Message**"
    DEFAULT_USER_INFO = {"name": None, "email": None}

    DIFF_FUNCTIONS = {
        "classic": utils.git.classic_tree_diff,
        "dict": utils.git.dict_tree_diff,
        "changes": utils.git.dict_tree_diff,
    }
    DEFAULT_DIFF_TYPE = "dict"

    HIDDEN_REGEXES = [
        # Hide git directory
        r".*\/\.git\/.*"
    ]

    # References
    REFS_BRANCHES = "refs/heads/"
    REFS_REMOTES = "refs/remotes/"
    REFS_TAGS = "refs/tags/"

    # Name pattern truths
    # Used for detecting if files are :
    # - deleted
    # - added
    # - changed
    PATTERN_ADDED = (False, True)
    PATTERN_REMOVED = (True, False)
    PATTERN_MODIFIED = (True, True)

    # Permissions
    MODE_DIRECTORY = 040000  # Used to tell if a tree entry is a directory

    # Tree depth
    MAX_TREE_DEPTH = 1000

    # Acceptable Root paths
    ROOT_PATHS = (os.path.curdir, os.path.sep)

    def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs):
        if isinstance(repo_or_path, DulwichRepo):
            self.repo = repo_or_path
        elif isinstance(repo_or_path, Gittle):
            self.repo = DulwichRepo(repo_or_path.path)
        elif isinstance(repo_or_path, basestring):
            path = os.path.abspath(repo_or_path)
            self.repo = DulwichRepo(path)
        else:
            logging.warning("Repo is of type %s" % type(repo_or_path))
            raise Exception("Gittle must be initialized with either a dulwich repository or a string to the path")

        # Set path
        self.path = self.repo.path

        # The remote url
        self.origin_uri = origin_uri

        # Report client activty
        self._report_activity = report_activity

        # Build ignore filter
        self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES)
        self.hidden_regexes.extend(self._get_ignore_regexes())
        self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes)
        self.filters = [self.ignore_filter]

        # Get authenticator
        if auth:
            self.authenticator = auth
        else:
            self.auth(*args, **kwargs)

    def report_activity(self, *args, **kwargs):
        if not self._report_activity:
            return
        return self._report_activity(*args, **kwargs)

    def _format_author(self, name, email):
        return "%s <%s>" % (name, email)

    def _format_userinfo(self, userinfo):
        name = userinfo.get("name")
        email = userinfo.get("email")
        if name and email:
            return self._format_author(name, email)
        return None

    def _format_ref(self, base, extra):
        return "".join([base, extra])

    def _format_ref_branch(self, branch_name):
        return self._format_ref(self.REFS_BRANCHES, branch_name)

    def _format_ref_remote(self, remote_name):
        return self._format_ref(self.REFS_REMOTES, remote_name)

    def _format_ref_tag(self, tag_name):
        return self._format_ref(self.REFS_TAGS, tag_name)

    @property
    def head(self):
        """Return SHA of the current HEAD
        """
        return self.repo.head()

    @property
    def is_bare(self):
        """Bare repositories have no working directories or indexes
        """
        return self.repo.bare

    @property
    def is_working(self):
        return not (self.is_bare)

    def has_index(self):
        """Opposite of is_bare
        """
        return self.repo.has_index()

    @property
    def has_commits(self):
        """
        If the repository has no HEAD we consider that is has no commits
        """
        try:
            self.repo.head()
        except KeyError:
            return False
        return True

    def ref_walker(self, ref=None):
        """
        Very simple, basic walker
        """
        ref = ref or "HEAD"
        sha = self._commit_sha(ref)
        return self.repo.revision_history(sha)

    def branch_walker(self, branch):
        branch = branch or self.DEFAULT_BRANCH
        ref = self._format_ref_branch(branch)
        return self.ref_walker(ref)

    def commit_info(self, start=0, end=None, branch=None):
        """Return a generator of commits with all their attached information
        """
        if not self.has_commits:
            return []
        commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)]
        if not end:
            return commits
        return commits[start:end]

    @funky.uniquify
    def recent_contributors(self, n=None, branch=None):
        n = n or 10
        return funky.pluck(self.commit_info(end=n, branch=branch), "author")

    @property
    def commit_count(self):
        try:
            return len(self.ref_walker())
        except KeyError:
            return 0

    def commits(self):
        """Return a list of SHAs for all the concerned commits
        """
        return [commit["sha"] for commit in self.commit_info()]

    @property
    def git_dir(self):
        return self.repo.controldir()

    def auth(self, *args, **kwargs):
        self.authenticator = GittleAuth(*args, **kwargs)
        return self.authenticator

    # Generate a branch selector (used for pushing)
    def _wants_branch(self, branch_name=None):
        branch_name = branch_name or self.DEFAULT_BRANCH
        refs_key = self._format_ref_branch(branch_name)
        sha = self.branches[branch_name]

        def wants_func(old):
            refs_key = self._format_ref_branch(branch_name)
            return {refs_key: sha}

        return wants_func

    def _get_ignore_regexes(self):
        gitignore_filename = os.path.join(self.path, ".gitignore")
        if not os.path.exists(gitignore_filename):
            return []
        lines = open(gitignore_filename).readlines()
        globers = map(lambda line: line.rstrip(), lines)
        return utils.paths.globers_to_regex(globers)

    # Get the absolute path for a file in the git repo
    def abspath(self, repo_file):
        return os.path.abspath(os.path.join(self.path, repo_file))

    # Get the relative path from the absolute path
    def relpath(self, abspath):
        return os.path.relpath(abspath, self.path)

    @property
    def last_commit(self):
        return self[self.repo.head()]

    @property
    def index(self):
        return self.repo.open_index()

    @classmethod
    def init(cls, path, bare=None, *args, **kwargs):
        """Initialize a repository"""
        mkdir_safe(path)

        # Constructor to use
        if bare:
            constructor = DulwichRepo.init_bare
        else:
            constructor = DulwichRepo.init

        # Create dulwich repo
        repo = constructor(path)

        # Create Gittle repo
        return cls(repo, *args, **kwargs)

    @classmethod
    def init_bare(cls, *args, **kwargs):
        kwargs.setdefault("bare", True)
        return cls.init(*args, **kwargs)

    def get_client(self, origin_uri=None, **kwargs):
        # Get the remote URL
        origin_uri = origin_uri or self.origin_uri

        # Fail if inexistant
        if not origin_uri:
            raise InvalidRemoteUrl()

        client_kwargs = {}
        auth_kwargs = self.authenticator.kwargs()

        client_kwargs.update(auth_kwargs)
        client_kwargs.update(kwargs)
        client_kwargs.update({"report_activity": self.report_activity})

        client, remote_path = get_transport_and_path(origin_uri, **client_kwargs)
        return client, remote_path

    def push_to(self, origin_uri, branch_name=None, progress=None, progress_stderr=None):
        selector = self._wants_branch(branch_name=branch_name)
        client, remote_path = self.get_client(origin_uri, progress_stderr=progress_stderr)
        return client.send_pack(remote_path, selector, self.repo.object_store.generate_pack_contents, progress=progress)

    # Like: git push
    def push(self, origin_uri=None, branch_name=None, progress=None, progress_stderr=None):
        return self.push_to(origin_uri, branch_name, progress, progress_stderr)

    # Not recommended at ALL ... !!!
    def dirty_pull_from(self, origin_uri, branch_name=None):
        # Remove all previously existing data
        rmtree(self.path)
        mkdir_safe(self.path)
        self.repo = DulwichRepo.init(self.path)

        # Fetch brand new copy from remote
        return self.pull_from(origin_uri, branch_name)

    def pull_from(self, origin_uri, branch_name=None):
        return self.fetch(origin_uri)

    # Like: git pull
    def pull(self, origin_uri=None, branch_name=None):
        return self.pull_from(origin_uri, branch_name)

    def fetch_remote(self, origin_uri=None):
        # Get client
        client, remote_path = self.get_client(origin_uri=origin_uri)

        # Fetch data from remote repository
        remote_refs = client.fetch(remote_path, self.repo)

        return remote_refs

    def _setup_fetched_refs(self, refs, origin, bare):
        remote_tags = utils.git.subrefs(refs, "refs/tags")
        remote_heads = utils.git.subrefs(refs, "refs/heads")

        # Filter refs
        clean_remote_tags = utils.git.clean_refs(remote_tags)
        clean_remote_heads = utils.git.clean_refs(remote_heads)

        # Base of new refs
        heads_base = "refs/remotes/" + origin
        if bare:
            heads_base = "refs/heads"

        # Import branches
        self.import_refs(heads_base, clean_remote_heads)

        # Import tags
        self.import_refs("refs/tags", clean_remote_tags)

        # Update HEAD
        self["HEAD"] = refs["HEAD"]

    def fetch(self, origin_uri=None, bare=None, origin=None):
        bare = bare or False
        origin = origin or self.DEFAULT_REMOTE

        # Remote refs
        remote_refs = self.fetch_remote(origin_uri)

        # Update head
        # Hit repo because head doesn't yet exist so
        # print("REFS = %s" % remote_refs)

        # Update refs (branches, tags, HEAD)
        self._setup_fetched_refs(remote_refs, origin, bare)

        # Checkout working directories
        if not bare:
            self.checkout_all()
        else:
            self.update_server_info()

    @classmethod
    def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs):
        """Clone a remote repository"""
        mkdir_safe(local_path)

        # Initialize the local repository
        if bare:
            local_repo = cls.init_bare(local_path)
        else:
            local_repo = cls.init(local_path)

        repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs)

        repo.fetch(bare=bare)

        # Add origin
        # TODO

        return repo

    @classmethod
    def clone_bare(cls, *args, **kwargs):
        """Same as .clone except clones to a bare repository by default
        """
        kwargs.setdefault("bare", True)
        return cls.clone(*args, **kwargs)

    def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs):

        if not tree:
            # If no tree then stage files
            modified_files = files or self.modified_files
            logging.warning("STAGING : %s" % modified_files)
            self.add(modified_files)

        # Messages
        message = message or self.DEFAULT_MESSAGE
        author_msg = self._format_userinfo(author)
        committer_msg = self._format_userinfo(committer)

        return self.repo.do_commit(
            message=message, author=author_msg, committer=committer_msg, encoding="UTF-8", tree=tree, *args, **kwargs
        )

    def _tree_from_structure(self, structure):
        # TODO : Support directories
        tree = Tree()

        for file_info in structure:

            # str only
            try:
                data = file_info["data"].encode("ascii")
                name = file_info["name"].encode("ascii")
                mode = file_info["mode"]
            except:
                # Skip file on encoding errors
                continue

            blob = Blob()

            blob.data = data

            # Store file's contents
            self.repo.object_store.add_object(blob)

            # Add blob entry
            tree.add(name, mode, blob.id)

        # Store tree
        self.repo.object_store.add_object(tree)

        return tree.id

    # Like: git commmit -a
    def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs):
        user_info = {"name": name, "email": email}
        return self._commit(committer=user_info, author=user_info, message=message, files=files, *args, **kwargs)

    def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs):
        """Main use is to do commits directly to bare repositories
        For example doing a first Initial Commit so the repo can be cloned and worked on right away
        """
        if not structure:
            return
        tree = self._tree_from_structure(structure)

        user_info = {"name": name, "email": email}

        return self._commit(committer=user_info, author=user_info, message=message, tree=tree, *args, **kwargs)

    # Push all local commits
    # and pull all remote commits
    def sync(self, origin_uri=None):
        self.push(origin_uri)
        return self.pull(origin_uri)

    def lookup_entry(self, relpath, trackable_files=set()):
        if not relpath in trackable_files:
            raise KeyError

        abspath = self.abspath(relpath)

        with open(abspath, "rb") as git_file:
            data = git_file.read()
            s = sha1()
            s.update("blob %u\0" % len(data))
            s.update(data)
        return (s.hexdigest(), os.stat(abspath).st_mode)

    @property
    @funky.transform(set)
    def tracked_files(self):
        return list(self.index)

    @property
    @funky.transform(set)
    def raw_files(self):
        return utils.paths.subpaths(self.path)

    @property
    @funky.transform(set)
    def ignored_files(self):
        return utils.paths.subpaths(self.path, filters=self.filters)

    @property
    @funky.transform(set)
    def trackable_files(self):
        return self.raw_files - self.ignored_files

    @property
    @funky.transform(set)
    def untracked_files(self):
        return self.trackable_files - self.tracked_files

    """
    @property
    @funky.transform(set)
    def modified_staged_files(self):
        "Checks if the file has changed since last commit"
        timestamp = self.last_commit.commit_time
        index = self.index
        return [
            f
            for f in self.tracked_files
            if index[f][1][0] > timestamp
        ]
    """

    # Return a list of tuples
    # representing the changed elements in the git tree
    def _changed_entries(self, ref=None):
        ref = ref or self.DEFAULT_COMMIT
        if not self.has_commits:
            return []
        obj_sto = self.repo.object_store
        tree_id = self[ref].tree
        names = self.trackable_files

        lookup_func = partial(self.lookup_entry, trackable_files=names)

        # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...]
        tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False)
        return list(tree_diff)

    @funky.transform(set)
    def _changed_entries_by_pattern(self, pattern):
        changed_entries = self._changed_entries()
        filtered_paths = [
            funky.first_true(names)
            for names, modes, sha in changed_entries
            if tuple(map(bool, names)) == pattern and funky.first_true(names)
        ]

        return filtered_paths

    @property
    @funky.transform(set)
    def removed_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files

    @property
    @funky.transform(set)
    def added_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files

    @property
    @funky.transform(set)
    def modified_files(self):
        modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files
        return modified_files

    @property
    @funky.transform(set)
    def modified_unstaged_files(self):
        timestamp = self.last_commit.commit_time
        return [f for f in self.tracked_files if os.stat(self.abspath(f)).st_mtime > timestamp]

    @property
    def pending_files(self):
        """
        Returns a list of all files that could be possibly staged
        """
        # Union of both
        return self.modified_files | self.added_files | self.removed_files

    @property
    def pending_files_by_state(self):
        files = {"modified": self.modified_files, "added": self.added_files, "removed": self.removed_files}

        # "Flip" the dictionary
        return {path: state for state, paths in files.items() for path in paths}

    """
    @property
    @funky.transform(set)
    def modified_files(self):
        return self.modified_staged_files | self.modified_unstaged_files
    """

    # Like: git add
    @funky.arglist_method
    def stage(self, files):
        return self.repo.stage(files)

    def add(self, *args, **kwargs):
        return self.stage(*args, **kwargs)

    # Like: git rm
    @funky.arglist_method
    def rm(self, files, force=False):
        index = self.index
        index_files = filter(lambda f: f in index, files)
        for f in index_files:
            del self.index[f]
        return index.write()

    def mv_fs(self, file_pair):
        old_name, new_name = file_pair
        os.rename(old_name, new_name)

    # Like: git mv
    @funky.arglist_method
    def mv(self, files_pair):
        index = self.index
        files_in_index = filter(lambda f: f[0] in index, files_pair)
        map(self.mv_fs, files_in_index)
        old_files = map(funky.first, files_in_index)
        new_files = map(funky.last, files_in_index)
        self.add(new_files)
        self.rm(old_files)
        self.add(old_files)
        return

    @working_only
    def _checkout_tree(self, tree):
        return build_index_from_tree(self.repo.path, self.repo.index_path(), self.repo.object_store, tree)

    def checkout_all(self, commit_sha=None):
        commit_sha = commit_sha or self.head
        commit_tree = self._commit_tree(commit_sha)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    def checkout(self, commit_sha=None, files=None):
        """Checkout only a select amount of files
        """
        commit_sha = commit_sha or self.head
        files = files or []

        return self

    @funky.arglist_method
    def reset(self, files, commit="HEAD"):
        pass

    def rm_all(self):
        self.index.clear()
        return self.index.write()

    def _to_commit(self, commit_obj):
        """Allows methods to accept both SHA's or dulwich Commit objects as arguments
        """
        if isinstance(commit_obj, basestring):
            return self.repo[commit_obj]
        return commit_obj

    def _commit_sha(self, commit_obj):
        """Extracts a Dulwich commits SHA
        """
        if utils.git.is_sha(commit_obj):
            return commit_obj
        elif isinstance(commit_obj, basestring):
            # Can't use self[commit_obj] to avoid infinite recursion
            commit_obj = self.repo[commit_obj]
        return commit_obj.id

    def _blob_data(self, sha):
        """Return a blobs content for a given SHA
        """
        return self[sha].data

    # Get the nth parent back for a given commit
    def get_parent_commit(self, commit, n=None):
        """ Recursively gets the nth parent for a given commit
            Warning: Remember that parents aren't the previous commits
        """
        if n is None:
            n = 1
        commit = self._to_commit(commit)
        parents = commit.parents

        if n <= 0 or not parents:
            # Return a SHA
            return self._commit_sha(commit)

        parent_sha = parents[0]
        parent = self[parent_sha]

        # Recur
        return self.get_parent_commit(parent, n - 1)

    def get_previous_commit(self, commit_ref, n=None):
        commit_sha = self._parse_reference(commit_ref)
        n = n or 1
        commits = self.commits()
        return funky.next(commits, commit_sha, n=n, default=commit_sha)

    def _parse_reference(self, ref_string):
        # COMMIT_REF~x
        if "~" in ref_string:
            ref, count = ref_string.split("~")
            count = int(count)
            commit_sha = self._commit_sha(ref)
            return self.get_previous_commit(commit_sha, count)
        return self._commit_sha(ref_string)

    def _commit_tree(self, commit_sha):
        """Return the tree object for a given commit
        """
        return self[commit_sha].tree

    def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True):
        diff_type = diff_type or self.DEFAULT_DIFF_TYPE
        diff_func = self.DIFF_FUNCTIONS[diff_type]

        if not compare_to:
            compare_to = self.get_previous_commit(commit_sha)

        return self._diff_between(compare_to, commit_sha, diff_function=diff_func)

    def diff_working(self, ref=None, filter_binary=True):
        """Diff between the current working directory and the HEAD
        """
        return utils.git.diff_changes_paths(
            self.repo.object_store, self.path, self._changed_entries(ref=ref), filter_binary=filter_binary
        )

    def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None):
        """Returns a dict of the following Format :
            {
                "directory/filename.txt": {
                    'name': 'filename.txt',
                    'path': "directory/filename.txt",
                    "sha": "xxxxxxxxxxxxxxxxxxxx",
                    "data": "blablabla",
                    "mode": 0xxxxx",
                },
                ...
            }
        """
        # Default values
        context = {}
        is_tree = is_tree or False
        parent_path = parent_path or ""

        if is_tree:
            tree = self[commit_sha]
        else:
            tree = self[self._commit_tree(commit_sha)]

        for mode, path, sha in tree.entries():
            # Check if entry is a directory
            if mode == self.MODE_DIRECTORY:
                context.update(
                    self.get_commit_files(sha, parent_path=os.path.join(parent_path, path), is_tree=True, paths=paths)
                )
                continue

            subpath = os.path.join(parent_path, path)

            # Only add the files we want
            if not (paths is None or subpath in paths):
                continue

            # Add file entry
            context[subpath] = {"name": path, "path": subpath, "mode": mode, "sha": sha, "data": self._blob_data(sha)}
        return context

    def file_versions(self, path):
        """Returns all commits where given file was modified
        """
        versions = []
        commits_info = self.commit_info()
        seen_shas = set()

        for commit in commits_info:
            try:
                files = self.get_commit_files(commit["sha"], paths=[path])
                file_path, file_data = files.items()[0]
            except IndexError:
                continue

            file_sha = file_data["sha"]

            if file_sha in seen_shas:
                continue
            else:
                seen_shas.add(file_sha)

            # Add file info
            commit["file"] = file_data
            versions.append(file_data)
        return versions

    def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True):
        """Internal method for getting a diff between two commits
            Please use .diff method unless you have very speciic needs
        """

        # If commit is first commit (new_commit_sha == old_commit_sha)
        # then compare to an empty tree
        if new_commit_sha == old_commit_sha:
            old_tree = Tree()
        else:
            old_tree = self._commit_tree(old_commit_sha)

        new_tree = self._commit_tree(new_commit_sha)

        return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary)

    def changes(self, *args, **kwargs):
        """ List of changes between two SHAs
            Returns a list of lists of tuples :
            [
                [
                    (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
                ],
                ...
            ]
        """
        kwargs["diff_type"] = "changes"
        return self.diff(*args, **kwargs)

    def changes_count(self, *args, **kwargs):
        return len(self.changes(*args, **kwargs))

    def _refs_by_pattern(self, pattern):
        refs = self.refs

        def item_filter(key_value):
            """Filter only concered refs"""
            key, value = key_value
            return key.startswith(pattern)

        def item_map(key_value):
            """Rewrite keys"""
            key, value = key_value
            new_key = key[len(pattern) :]
            return (new_key, value)

        return dict(map(item_map, filter(item_filter, refs.items())))

    @property
    def refs(self):
        return self.repo.get_refs()

    def set_refs(refs_dict):
        for k, v in refs_dict.items():
            self.repo[k] = v

    def import_refs(self, base, other):
        return self.repo.refs.import_refs(base, other)

    @property
    def branches(self):
        return self._refs_by_pattern(self.REFS_BRANCHES)

    def _active_branch(self, refs=None, head=None):
        head = head or self.head
        refs = refs or self.branches
        try:
            return {branch: branch_head for branch, branch_head in refs.items() if branch_head == head}.items()[0]
        except IndexError:
            pass
        return (None, None)

    @property
    def active_branch(self):
        return self._active_branch()[0]

    @property
    def active_sha(self):
        return self._active_branch()[1]

    @property
    def remote_branches(self):
        return self._refs_by_pattern(self.REFS_REMOTES)

    @property
    def tags(self):
        return self._refs_by_pattern(self.REFS_TAGS)

    @property
    def remotes(self):
        """ Dict of remotes
        {
            'origin': 'http://friendco.de/some_user/repo.git',
            ...
        }
        """
        config = self.repo.get_config()
        return {keys[1]: values["url"] for keys, values in config.items() if keys[0] == "remote"}

    def add_ref(self, new_ref, old_ref):
        self.repo.refs[new_ref] = self.repo.refs[old_ref]
        self.update_server_info()

    def remove_ref(self, ref_name):
        # Returns False if ref doesn't exist
        if not ref_name in self.repo.refs:
            return False
        del self.repo.refs[ref_name]
        self.update_server_info()
        return True

    def create_branch(self, base_branch, new_branch, tracking=None):
        """Try creating a new branch which tracks the given remote
            if such a branch does not exist then branch off a local branch
        """

        # The remote to track
        tracking = self.DEFAULT_REMOTE

        # Already exists
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        # Get information about remote_branch
        remote_branch = os.path.sep.join([tracking, base_branch])

        # Fork Local
        if base_branch in self.branches:
            base_ref = self._format_ref_branch(base_branch)
        # Fork remote
        elif remote_branch in self.remote_branches:
            base_ref = self._format_ref_remote(remote_branch)
            # TODO : track
        else:
            raise Exception(
                "Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking)
            )

        # Reference of new branch
        new_ref = self._format_ref_branch(new_branch)

        # Copy reference to create branch
        self.add_ref(new_ref, base_ref)

        return new_ref

    def remove_branch(self, branch_name):
        ref = self._format_ref_branch(branch_name)
        return self.remove_ref(ref)

    def switch_branch(self, branch_name, tracking=None, create=None):
        """Changes the current branch
        """
        if create is None:
            create = True

        # Check if branch exists
        if not branch_name in self.branches:
            self.create_branch(branch_name, branch_name, tracking=tracking)

        # Get branch reference
        branch_ref = self._format_ref_branch(branch_name)

        # Change main branch
        self.repo.refs.set_symbolic_ref("HEAD", branch_ref)

        if self.is_working:
            # Remove all files
            self.clean_working()

            # Add files for the current branch
            self.checkout_all()

    def clean(self, force=None, directories=None):
        untracked_files = self.untracked_files
        map(os.remove, untracked_files)
        return untracked_files

    def clean_working(self):
        """Purges all the working (removes everything except .git)
            used by checkout_all to get clean branch switching
        """
        return self.clean()

    def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None):
        tree = self[tree_sha]
        structure = {}
        if depth is None:
            depth = self.MAX_TREE_DEPTH
        elif depth == 0:
            return structure
        for mode, path, sha in tree.entries():
            # tree
            if mode == self.MODE_DIRECTORY:
                # Recur
                structure[path] = self._get_fs_structure(sha, depth=depth - 1, parent_sha=tree_sha)
            # commit
            else:
                structure[path] = sha
        structure["."] = tree_sha
        structure[".."] = parent_sha or tree_sha
        return structure

    def _get_fs_structure_by_path(self, tree_sha, path):
        parts = path.split(os.path.sep)
        depth = len(parts) + 1
        structure = self._get_fs_structure(tree_sha, depth=depth)

        return funky.subkey(structure, parts)

    def commit_ls(self, ref, subpath=None):
        """List a "directory" for a given commit
            using the tree of thqt commit
        """
        tree_sha = self._commit_tree(ref)

        # Root path
        if subpath in self.ROOT_PATHS or not subpath:
            return self._get_fs_structure(tree_sha, depth=1)
        # Any other path
        return self._get_fs_structure_by_path(tree_sha, subpath)

    def commit_file(self, ref, path):
        """Return info on a given file for a given commit
        """
        name, info = self.get_commit_files(ref, paths=[path]).items()[0]
        return info

    def commit_tree(self, ref, *args, **kwargs):
        tree_sha = self._commit_tree(ref)
        return self._get_fs_structure(tree_sha, *args, **kwargs)

    def update_server_info(self):
        if not self.is_bare:
            return
        update_server_info(self.repo)

    def _is_fast_forward(self):
        pass

    def _merge_fast_forward(self):
        pass

    def __hash__(self):
        """This is required otherwise the memoize function will just mess it up
        """
        return hash(self.path)

    def __getitem__(self, key):
        sha = self._parse_reference(key)
        return self.repo[sha]

    def __setitem__(self, key, value):
        self.repo[key] = value

    # Alias to clone_bare
    fork = clone_bare
    log = commit_info
    diff_count = changes_count
    comtributors = recent_contributors
Exemple #31
0
class Wiki(HookMixin):
    path = None
    base_path = '/'
    default_ref = 'master'
    default_committer_name = 'Anon'
    default_committer_email = '*****@*****.**'
    index_page = 'home'
    repo = None

    def __init__(self, path):
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            self.repo = Repo.init(path, mkdir=True)

        self.path = path

    def __repr__(self):
        return "Wiki: %s" % self.path

    def commit(self, name, email, message, files):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        print "commit"
        if isinstance(name, unicode):
            name = name.encode('utf-8')
        if isinstance(email, unicode):
            email = email.encode('utf-8')
        if isinstance(message, unicode):
            message = message.encode('utf-8')
        author = committer = "%s <%s>" % (name, email)
        self.repo.stage(files)
        return self.repo.do_commit(message=message,
                                   committer=committer,
                                   author=author)

    def push(commit, approved_by):
        """Commit to the underlying git repo.

        :param name: Committer name
        :param email: Committer email
        :param message: Commit message
        :param files: list of file names that will be staged for commit
        :return:
        """
        print commit

        output = subprocess.check_output(["git", "pull"])
        print output

        output = subprocess.check_output(["git", "rebase -i " + commit])
        print output

        output = subprocess.check_output([":x"])
        print output

        output = subprocess.check_output(
            ["git", "push origin " + commit + ":master"])
        print output

    def get_page(self, name, sha='HEAD'):
        """Get page data, partials, commit info.

        :param name: Name of page.
        :param sha: Commit sha.
        :return: dict

        """
        return WikiPage(name, self, sha=sha)

    def get_index(self):
        """Get repo index of head.

        :return: list -- List of dicts

        """
        rv = []
        index = self.repo.open_index()
        for name in index:
            rv.append(
                dict(name=filename_to_cname(name),
                     filename=name,
                     ctime=index[name].ctime[0],
                     mtime=index[name].mtime[0],
                     sha=index[name].sha,
                     size=index[name].size))

        return rv