Esempio n. 1
0
def get_recent_tags(projdir=PROJDIR):
    """
    Get list of recent tags in order from newest to oldest and their datetimes.

    :param projdir: path to ``.git``
    :returns: list of (tag, [datetime, commit, author]) sorted from new to old
    """
    project = Repo(projdir)  # dulwich repository object
    refs = project.get_refs()  # dictionary of refs and their SHA-1 values
    tags = {}  # empty dictionary to hold tags, commits and datetimes
    # iterate over refs in repository
    for key, value in refs.iteritems():
        obj = project.get_object(value)  # dulwich object from SHA-1
        # check if object is tag
        if obj.type_name != 'tag':
            # skip ref if not a tag
            continue
        # strip the leading text from "refs/tag/<tag name>" to get "tag name"
        _, tag = key.rsplit('/', 1)
        # check if tag object is commit, altho it should always be true
        if obj.object[0].type_name == 'commit':
            commit = project.get_object(obj.object[1])  # commit object
            # get tag commit datetime, but dulwich returns seconds since
            # beginning of epoch, so use Python time module to convert it to
            # timetuple then convert to datetime
            tags[tag] = [
                datetime.datetime(*time.gmtime(commit.commit_time)[:6]),
                commit.id,
                commit.author
            ]
            
    # return list of tags sorted by their datetimes from newest to oldest
    return sorted(tags.iteritems(), key=lambda tag: tag[1][0], reverse=True)
Esempio n. 2
0
class ArticleList(object):
    def __init__(self):
        self.repo = Repo('wiki')
        self.head = self.repo.get_object(self.repo.head())
        self.tree = self.repo.get_object(self.head.tree)

    def get_article_titles(self):
        return [a for a in self.tree]
Esempio n. 3
0
class Article(object):
    def __init__(self, title):
        self.title =  title.encode('UTF-8')
        self.repo = Repo('wiki')

        self.head = self.repo.get_object(self.repo.head())
        self.tree = self.repo.get_object(self.head.tree)

        try:
            sha = self.tree[self.title][1]
            self.content = self.repo[sha].data
        except KeyError:
            self.content = u'This space intentionally left blank.'

    def __str__(self):
        return self.title

    def get_content(self):
        return self.content

    def get_title(self):
        return self.title

    def update_content(self, new_content, author, email, message):
        new_content = new_content.encode('UTF-8')
        author = author.encode('UTF-8')
        message = message.encode('UTF-8')
        email = email.encode('UTF-8')

        # create blob, add to existing tree
        blob = Blob.from_string(new_content)
        self.tree[self.title] = (0100644, blob.id)

        # commit
        commit = Commit()
        commit.tree = self.tree.id
        commit.parents = [self.head.id]
        commit.author = commit.committer = "%s <%s>" % (author, email)
        commit.commit_time = commit.author_time = int(time())
        tz = parse_timezone('+0100')[0]  # FIXME: get proper timezone
        commit.commit_timezone = commit.author_timezone = tz
        commit.encoding = 'UTF-8'
        commit.message = message

        # save everything
        object_store = self.repo.object_store
        object_store.add_object(blob)
        object_store.add_object(self.tree)
        object_store.add_object(commit)

        self.repo.refs['refs/heads/master'] = commit.id

    def update_title(self, new_title):
        pass
Esempio n. 4
0
    def get_commit_file_diffs(repo_path, max_commit_count=-1):
        repo = Repo(repo_path)
        prev = None
        walker = repo.get_graph_walker()

        commit_changes = []
        commit_count = 0

        cset = walker.next()
        while cset is not None:
            commit = repo.get_object(cset)
            if prev is None:
                prev = commit.tree
                cset = walker.next()
                continue

            this_commit_changes = []

            for x in tree_changes(repo, prev, commit.tree):
                if x.old.path is not None:
                    this_commit_changes.append(x.old.path)

            commit_changes.append(this_commit_changes)

            prev = commit.tree

            commit_count += 1

            if max_commit_count > 0 and commit_count >= max_commit_count:
                cset = None
            else:
                cset = walker.next()

        return RepoDiffResult(repo_path, commit_changes, commit_count)
 def get_last_commit(self):
     try:
         repo = Repo(os.path.dirname(self.path))
     except NotGitRepository:
         repo = Repo.init(os.path.dirname(self.path))
     head = repo.head()
     head_commit = repo.get_object(head)
     return head_commit
Esempio n. 6
0
def get_repo_object(repo: Repo, object_name: Union[str, bytes]) -> Commit:
    if isinstance(object_name, str):
        object_name = object_name.encode()

    gotten_object = repo.get_object(object_name)
    if isinstance(gotten_object, Commit):
        return gotten_object

    raise RuntimeError(f"Got non-commit object {gotten_object}")
Esempio n. 7
0
    def view_file(self, *virtual_path, **keywords):
        branch = "master"
        desired_view = "default"
        sha = None
        if "sha" in keywords: sha = keywords["sha"]
        elif hasattr(self, "sha"): sha = self.sha
        if "branch" in keywords: branch = keywords["branch"]
        if "desired_view" in keywords: desired_view = keywords["desired_view"]
        if self.package is None: raise cherrypy.NotFound()
        if not (desired_view in self.acceptable_views): raise cherrypy.NotFound()

        #create the dulwich.repo.Repo object
        repo = Repo(self.package.path())

        #switch to the right refspec
        set_head(repo, branch)
        
        #reconstruct the filename
        filename = self.filename

        #get the sha if it wasn't passed to us
        try:
            if not sha: sha = dulwich.object_store.tree_lookup_path(repo.get_object, repo.get_object(repo.ref("refs/heads/" + branch)).tree, filename)[1]
            obj = repo.get_object(sha)
        except IndexError: raise cherrypy.NotFound()
        except KeyError: raise cherrypy.NotFound()
        
        output = str(obj.as_pretty_string())
        
        #determine the MIME type
        mime_type = "text/plain"
        #try:
        #    if filename.count(".") > 0:
        #        extension = filename.split(".")[-1]
        #        mime_type = mimetypes.types_map["." + extension]
        #except KeyError:
        #    mime_type = "text/plain"

        #set the MIME type
        #cherrypy.response.headers["Content-Type"] = mime_type
        self.content = "<pre>" + output + "</pre>"
        self.branch = branch
        return self.respond()
Esempio n. 8
0
def git_commit_info():
    git = Repo('.')
    commit = git.get_object(git.head())
    return {
        'id': commit.id.decode("utf-8")[0:7],
        'id_full': commit.id.decode("utf-8"),
        'author': regex.findall("(.*?) <(.*?)>",
                                commit.author.decode("utf-8"))[0],
        'message': commit.message.decode("utf-8").strip('\r\n').split('\n')[0]
    }
Esempio n. 9
0
class ManifestWalker:
    """
    Walk all branches for a manifest repository and return key info
    and the contents of each commit; this walker moves forward in
    Git history
    """
    def __init__(self, manifest_dir, latest_sha):
        """Initialize the repository connection and encode latest SHAs"""

        self.repo = Repo(manifest_dir)
        self.latest_sha = [sha.encode('utf-8') for sha in latest_sha]

    def walk(self):
        """
        Find all branches and do a full walk from a given commit,
        history forward, returning key information and contents
        of each commit
        """

        branches = [
            self.repo.get_object(self.repo.refs[ref])
            for ref in self.repo.refs.keys() if ref.startswith(b'refs/remotes')
        ]

        walker = self.repo.get_walker(include=list(
            set([branch.id for branch in branches])),
                                      exclude=self.latest_sha,
                                      reverse=True)

        for entry in walker:
            changes = entry.changes()

            # Skip any commit that doesn't have exactly one change
            # (Zero is a merge commit, more than one is a multi-file
            # commit)
            if len(changes) != 1:
                continue

            change = changes[0]
            yield ((change.new.path, entry.commit),
                   self.repo.get_object(change.new.sha).as_pretty_string())
Esempio n. 10
0
class Repository(BaseRepository):
    def __init__(self, *args, **kwargs):
        super(Repository, self).__init__(*args, **kwargs)

        self._repo = Repo(self.path)

    def _get_commit(self, commit_id):
        try:
            #return self._repo.commit(commit_id)
            return self._repo.get_object(commit_id)
        except Exception, e:
            raise CommitDoesNotExist("%s is not a commit" % commit_id)
Esempio n. 11
0
class Repository(BaseRepository):
    def __init__(self, *args, **kwargs):
        super(Repository, self).__init__(*args, **kwargs)

        self._repo = Repo(self.path)

    def _get_commit(self, commit_id):
        try:
            #return self._repo.commit(commit_id)
            return self._repo.get_object(commit_id)
        except Exception, e:
            raise CommitDoesNotExist("%s is not a commit" % commit_id)
Esempio n. 12
0
class GitRepository(Repository):
    CommitBuilder = GitCommitBuilder

    def __init__(self, path=None, workdir=None, create=False, bare=False):
        assert path or workdir
        if workdir:
            self.path = workdir.path
        else:
            self.path = path
        if create:
            # XXX: fragile
            path.ensure(dir=1)
            self.repo = Repo.init(path.strpath)
        else:
            assert self.path.check(dir=True)
            try:
                self.repo = Repo(self.path.strpath)
            except NotGitRepository:
                raise NotFoundError('git', self.path)

    def __len__(self):
        # XXX: fragile
        head = self.get_default_head()
        if head is None:
            return 0
        return len(self.repo.revision_history(head.id))

    def push(self):
        # XXX: hell, figure if the remote is empty, push master in that case
        # XXX: use dulwich?
        subprocess.check_call(['git', 'push', '--all'], cwd=self.path)

    def get_default_head(self):
        revs = self.repo.get_refs()
        head = revs.get('HEAD', revs.get('master'))
        if head is not None:
            return GitRevision(self, self.repo.get_object(head))

    def __getitem__(self, id):
        return GitRevision(self, self.repo.get_object(id))
Esempio n. 13
0
def seed_timestamps(tsdb):
    """ Populate teh timestamp database """
    r = Repo('.')
    tsdb['last_commit'] = r.get_object(r.refs['HEAD']).commit_time
    tsdb['last_run'] = int(time.time())
    for company in COMPANIES:
        tsdb.setdefault(company, dict())
        for f in os.listdir(company):
            if f.endswith('.rst'):
                filepath = os.path.join(company, f)
                tsdb[company].setdefault(f, dict(hash='', updated=0, finished=False))
                update_timestamp(tsdb[company][f], filepath)
                tsdb[company][f]['updated'] = last_updated(filepath)
                tsdb[copmany][f]['hash'] = hashlib.sha256(open(filepath, 'rb').read()).hexdigest()
Esempio n. 14
0
 def demo():
     repo = Repo(r"C:\Users\ThinkPad\Desktop\Python\rails")
     # print(repo)
     # print(repo.get_description())
     commit = repo.get_object(
         "fba1064153d8e2f4654df7762a7d3664b93e9fc8".encode("ascii"))
     print(commit.tree)
     # print(type(commit))
     # print(commit.author)
     # print(commit.committer)
     # print(commit.message)
     # print(commit.tree)
     tree = dulwichDemo.getObject(repo, commit.tree)
     # for item in tree.items():
     #     print(item)
     #     obj = (dulwichDemo.getObject(repo, item.sha))
     #     print(obj)
     #     if isinstance(obj, dulwich.objects.Blob):
     #         print(obj.splitlines().__len__())
     #         for line in obj.splitlines():
     #             print(line)
     commit2 = repo.get_object("8cdef19142792101b24e1124daa434b8171bf0f2")
     # print(commit2)
     # print(commit2.tree)
     diff = dulwich.diff_tree.tree_changes(repo.object_store,
                                           commit2.tree,
                                           commit.tree,
                                           include_trees=True,
                                           change_type_same=False)
     # print(diff)
     for d in diff:
         print(d.old)
         print(dulwichDemo.getObject(repo, d.old.sha))
         print(dulwichDemo.getObject(repo, d.new.sha))
         with open("diff{0}.txt".format(d.old.sha), "wb+") as f:
             dulwich.patch.write_blob_diff(f, d.old, d.new)
             print("-" * 50)
Esempio n. 15
0
    def index(self, **keywords):
        if "branch" in keywords: branch = keywords["branch"]
        else: branch = "master"

        content = ""
        #display a list of files
        repo = Repo(self.package.path())
        tree = repo.tree(repo.get_object(repo.ref("refs/heads/" + branch)).tree)
        for entry in tree.entries():
            filename = entry[1]
            file_sha = entry[2]
            content = content + "<a href=\"/package/" + self.package.name + ":" + branch + "/" + filename + "/" + file_sha + "\">" + filename + "</a><br />"
        self.content = content
        self.branch = branch
        return self.respond()
Esempio n. 16
0
def add_doc(namespace_path, project_path):
    user_repo = os.path.join(namespace_path, project_path)
    repo_path = build_repo_path(user_repo)

    if not os.path.exists(repo_path):
        err_msg = "Repo path {0} not exist".format(repo_path)
        print >> sys.stderr, err_msg
        return

    ix = open_dir(INDEXDIR)
    writer = ix.writer()

    try:
        repo = Repo(repo_path)
    except NotGitRepository:
        err_msg = "No git repository was found at {0}".format(repo_path)
        print >> sys.stderr, err_msg
        return

    try:
        refs = repo.get_refs()

        for ref in refs.keys():
            if ref.startswith('refs/heads') or ref.startswith('refs/tags'):
                obj = repo.get_object(refs[ref])

                if isinstance(obj, Tag):
                    commit = repo[obj.object[1]]
                else:
                    commit = obj

                tree = repo[commit.tree]

                for path, entry in get_entry(repo, tree):
                    filename = os.path.join(ref.rsplit('/')[2],
                                            path, entry.path)
                    blob = repo[entry.sha]

                    writer.add_document(
                        repo=user_repo.decode('UTF-8', 'ignore'),
                        ref=ref.decode('UTF-8', 'ignore'),
                        filename=filename.decode('UTF-8', 'ignore'),
                        content=blob.data.decode('UTF-8', 'ignore'))

        writer.commit()
    except:
        writer.cancel()
        raise
Esempio n. 17
0
    def _git_diff(self, sha_1, sha_2):
        """Produce the diff between sha1 & sha2

        :param sha_1: commit sha of "before" state
        :param sha_2: commit sha of "before" state
        """
        _repo = Repo(self.config['top_dir'])

        c_old = _repo.get_object(sha_1)
        c_new = _repo.get_object(sha_1)

        # default writes to stdout
        try:
            porcelain.diff_tree(_repo, c_old.tree, c_new.tree)
        except:
            raise GitMethodsError(message=exit_codes[6], exit_code=6)
Esempio n. 18
0
def seed_timestamps(tsdb):
    """ Populate teh timestamp database """
    r = Repo('.')
    tsdb['last_commit'] = r.get_object(r.refs['HEAD']).commit_time
    tsdb['last_run'] = int(time.time())
    for company in COMPANIES:
        tsdb.setdefault(company, dict())
        for f in os.listdir(company):
            if f.endswith('.rst'):
                filepath = os.path.join(company, f)
                tsdb[company].setdefault(
                    f, dict(hash='', updated=0, finished=False))
                update_timestamp(tsdb[company][f], filepath)
                tsdb[company][f]['updated'] = last_updated(filepath)
                tsdb[copmany][f]['hash'] = hashlib.sha256(
                    open(filepath, 'rb').read()).hexdigest()
Esempio n. 19
0
    def _git_diff(self, sha_1, sha_2):
        """Produce the diff between sha1 & sha2

        :param sha_1: commit sha of "before" state
        :param sha_2: commit sha of "before" state
        """
        _repo = Repo(self.config['top_dir'])

        c_old = _repo.get_object(sha_1)
        c_new = _repo.get_object(sha_1)

        # default writes to stdout
        try:
            porcelain.diff_tree(_repo, c_old.tree, c_new.tree)
        except:
            raise GitMethodsError(message=exit_codes[6], exit_code=6)
Esempio n. 20
0
class GitVCS(object):
    def __init__(self, path):
        self.repo = Repo(path)

    def branches(self):
        return dict(((k.replace('refs/heads/', ''), v) for k, v in self.repo.get_refs().items()
                     if k.startswith('refs/heads/')))

    def get_branch_ref(self, branch):
        branches = self.branches()
        if not branch in branches:
            return None
        return branches[branch]

    def get_object(self, sha1):
        try:
            return self.repo.get_object(sha1)
        except AssertionError:
            return None

    def get_object_by_path(self, ref, path):
        c = self.get_object(self.get_branch_ref(ref) or ref)
        if not c:
            return None
        paths = path.split(os.sep)
        count = len(paths)
        obj = self.get_object(c.tree)
        for i, x in enumerate(paths):
            if not x:
                break
            try:
                _mode, sha1 = obj[x]
            except KeyError:
                obj = None
                break
            obj = self.get_object(sha1)
            if i < count - 1 and not isinstance(obj, Tree):
                obj = None
                break

        if not obj:
            raise ValueError("Bad path")

        print "Result: ", type(obj), obj
        return obj
Esempio n. 21
0
    def log(self, *virtual_path, **keywords):
        branch = "master"
        if "branch" in keywords: branch = keywords["branch"]
        sha = None
        if "sha" in keywords: sha = keywords["sha"]
        path_to_lookup = self.filename
        content = ""

        repo = Repo(self.package.path())
        
        #get a list of all commits
        all_commits = get_all_commits(repo, branch, commits_to_avoid=set([]), start=None)

        relevant_commits = list()
        last_blob_id = None
        for commit in all_commits:
            #get the actual commit
            commit = repo.get_object(commit)

            #get the tree
            tree_id = commit.tree
            #tree = repo.get_object(tree)
            
            #check the blob
            #note it may not be in the tree
            try:
                blob_id = dulwich.object_store.tree_lookup_path(repo.get_object, tree_id, path_to_lookup)[1]

                if not (blob_id == last_blob_id):
                    last_blob_id = blob_id
                    relevant_commits.append((commit, blob_id))
            except KeyError: pass #the file wasn't in the tree at that time
        
        #now you have a list of relevant commits
        #make up some output
        for commit in relevant_commits:
            actual_commit = commit[0]
            content = content + "<hr><br /><br />view this version: <a href=\"/package/" + self.package.name + ":" + branch + "/" + path_to_lookup + "/" + commit[1] + "\">" + commit[1] + "</a><br />" + format_commit(actual_commit.as_pretty_string())
        
        self.branch = branch
        self.sha = sha
        self.content = content
        return self.respond()
Esempio n. 22
0
class GitHandler(object):

    def __init__(self, dest_repo, ui):
        self.repo = dest_repo
        self.ui = ui
        self.mapfile = 'git-mapfile'
        self.configfile = 'git-config'

        if ui.config('git', 'intree'):
            self.gitdir = self.repo.wjoin('.git')
        else:
            self.gitdir = self.repo.join('git')

        self.importbranch = ui.config('git', 'importbranch')
        self.exportbranch = ui.config('git', 'exportbranch', 'refs/heads/master')
        self.bookbranch = ui.config('git', 'bookbranch', '')

        self.init_if_missing()
        self.load_git()
        self.load_map()
        self.load_config()

    # make the git data directory
    def init_if_missing(self):
        if not os.path.exists(self.gitdir):
            os.mkdir(self.gitdir)
            Repo.init_bare(self.gitdir)

    def load_git(self):
        self.git = Repo(self.gitdir)

    ## FILE LOAD AND SAVE METHODS

    def map_set(self, gitsha, hgsha):
        self._map_git[gitsha] = hgsha
        self._map_hg[hgsha] = gitsha

    def map_hg_get(self, gitsha):
	return self._map_git.get(gitsha)

    def map_git_get(self, hgsha):
	return self._map_hg.get(hgsha)

    def load_map(self):
        self._map_git = {}
        self._map_hg = {}
        if os.path.exists(self.repo.join(self.mapfile)):
            for line in self.repo.opener(self.mapfile):
                gitsha, hgsha = line.strip().split(' ', 1)
                self._map_git[gitsha] = hgsha
                self._map_hg[hgsha] = gitsha

    def save_map(self):
        file = self.repo.opener(self.mapfile, 'w+', atomictemp=True)
        for gitsha, hgsha in sorted(self._map_git.iteritems()):
            file.write("%s %s\n" % (gitsha, hgsha))
        file.rename()

    def load_config(self):
        self._config = {}
        if os.path.exists(self.repo.join(self.configfile)):
            for line in self.repo.opener(self.configfile):
                key, value = line.strip().split(' ', 1)
                self._config[key] = value

    def save_config(self):
        file = self.repo.opener(self.configfile, 'w+', atomictemp=True)
        for key, value in self._config.iteritems():
            file.write("%s %s\n" % (key, value))
        file.rename()


    ## END FILE LOAD AND SAVE METHODS

    def import_commits(self, remote_name):
        self.import_git_objects(remote_name)
        self.save_map()

    def fetch(self, remote_name):
        self.ui.status(_("fetching from : %s\n") % remote_name)
        self.export_git_objects()
        refs = self.fetch_pack(remote_name)
        if refs:
            self.import_git_objects(remote_name, refs)
            self.import_local_tags(refs)
        self.save_map()

    def export_commits(self):
        self.export_git_objects()
        self.export_hg_tags()
        self.update_references()
        self.save_map()

    def push(self, remote_name):
        self.ui.status(_("pushing to : %s\n") % remote_name)
        self.export_commits()
        self.update_remote_references(remote_name)
        self.upload_pack(remote_name)

    def remote_add(self, remote_name, git_url):
        self._config['remote.' + remote_name + '.url'] = git_url
        self.save_config()

    def remote_remove(self, remote_name):
        key = 'remote.' + remote_name + '.url'
        if key in self._config:
            del self._config[key]
        self.save_config()

    def remote_show(self, remote_name):
        key = 'remote.' + remote_name + '.url'
        if key in self._config:
            name = self._config[key]
            self.ui.status(_("URL for %s : %s\n") % (remote_name, name, ))
        else:
            self.ui.status(_("No remote named : %s\n") % remote_name)
        return

    def remote_list(self):
        for key, value in self._config.iteritems():
            if key[0:6] == 'remote':
                self.ui.status('%s\t%s\n' % (key, value, ))

    def remote_name_to_url(self, remote_name):
        return self._config['remote.' + remote_name + '.url']

    def update_references(self):
        try:
            # We only care about bookmarks of the form 'name',
            # not 'remote/name'.
            def is_local_ref(item): return item[0].count('/') == 0
            bms = bookmarks.parse(self.repo)
            bms = dict(filter(is_local_ref, bms.items()))

            # Create a local Git branch name for each
            # Mercurial bookmark.
            for key in bms:
                hg_sha  = hex(bms[key])
                git_sha = self.map_git_get(hg_sha)
                self.git.set_ref('refs/heads/' + key, git_sha)
        except AttributeError:
            # No bookmarks extension
            pass

        c = self.map_git_get(hex(self.repo.changelog.tip()))
        self.git.set_ref(self.exportbranch, c)

    def export_hg_tags(self):
        for tag, sha in self.repo.tags().iteritems():
            if tag[-3:] == '^{}':
                continue
            if tag == 'tip':
                continue 
            self.git.set_ref('refs/tags/' + tag, self.map_git_get(hex(sha)))

    # Make sure there's a refs/remotes/remote_name/name
    #           for every refs/heads/name
    def update_remote_references(self, remote_name):
        self.git.set_remote_refs(self.local_heads(), remote_name)

    def local_heads(self):
        def is_local_head(item): return item[0].startswith('refs/heads')
        refs = self.git.get_refs()
        return dict(filter(is_local_head, refs.items()))

    def export_git_objects(self):
        self.ui.status(_("importing Hg objects into Git\n"))
        total = len(self.repo.changelog)
        if total:
          magnitude = int(math.log(total, 10)) + 1
        else:
          magnitude = 1
        for i, rev in enumerate(self.repo.changelog):
            if i%100 == 0:
                self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total))
            
            ctx = self.repo.changectx(rev)
            state = ctx.extra().get('hg-git', None)
            if state == 'octopus':
                self.ui.debug("revision %d is a part of octopus explosion\n" % rev)
                continue
            pgit_sha, already_written = self.export_hg_commit(rev)
            if not already_written:
                self.save_map()

    # convert this commit into git objects
    # go through the manifest, convert all blobs/trees we don't have
    # write the commit object (with metadata info)
    def export_hg_commit(self, rev):
        def is_octopus_part(ctx):
            return ctx.extra().get('hg-git', None) in set(['octopus', 'octopus-done'])

        # return if we've already processed this
        node = self.repo.changelog.lookup(rev)
        phgsha = hex(node)
        pgit_sha = self.map_git_get(phgsha)
        if pgit_sha:
            return pgit_sha, True

        self.ui.status(_("converting revision %s\n") % str(rev))

        # make sure parents are converted first
        ctx = self.repo.changectx(rev)
        extra = ctx.extra()

        parents = []
        if extra.get('hg-git', None) == 'octopus-done':
            # implode octopus parents
            part = ctx
            while is_octopus_part(part):
                (p1, p2) = part.parents()
                assert not is_octopus_part(p1)
                parents.append(p1)
                part = p2
            parents.append(p2)
        else:
            parents = ctx.parents()

        for parent in parents:
            p_rev = parent.rev()
            hgsha = hex(parent.node())
            git_sha = self.map_git_get(hgsha)
            if not p_rev == -1:
                if not git_sha:
                    self.export_hg_commit(p_rev)

        tree_sha, renames = self.write_git_tree(ctx)
        
        commit = {}
        commit['tree'] = tree_sha
        (time, timezone) = ctx.date()

        # hg authors might not have emails
        author = ctx.user()
        if not '>' in author: 
            author = author + ' <none@none>'
        commit['author'] = author + ' ' + str(int(time)) + ' ' + format_timezone(-timezone)
        message = ctx.description()
        commit['message'] = ctx.description() + "\n"

        if 'committer' in extra:
            # fixup timezone
            (name_timestamp, timezone) = extra['committer'].rsplit(' ', 1)
            try:
                timezone = format_timezone(-int(timezone))
                commit['committer'] = '%s %s' % (name_timestamp, timezone)
            except ValueError:
                self.ui.warn(_("Ignoring committer in extra, invalid timezone in r%s: '%s'.\n") % (rev, timezone))
        if 'encoding' in extra:
            commit['encoding'] = extra['encoding']

        # HG EXTRA INFORMATION
        add_extras = False
        extra_message = ''
        if not ctx.branch() == 'default':
            add_extras = True
            extra_message += "branch : " + ctx.branch() + "\n"

        if renames:
            add_extras = True
            for oldfile, newfile in renames:
                extra_message += "rename : " + oldfile + " => " + newfile + "\n"

        for key, value in extra.iteritems():
            if key in ['committer', 'encoding', 'branch', 'hg-git', 'git']:
                continue
            else:
                add_extras = True        
                extra_message += "extra : " + key + " : " +  urllib.quote(value) + "\n"

        if add_extras:
            commit['message'] += "\n--HG--\n" + extra_message

        commit['parents'] = []
        for parent in parents:
            hgsha = hex(parent.node())
            git_sha = self.map_git_get(hgsha)
            if git_sha:
                commit['parents'].append(git_sha)

        commit_sha = self.git.write_commit_hash(commit) # writing new blobs to git
        self.map_set(commit_sha, phgsha)
        return commit_sha, False

    def write_git_tree(self, ctx):
        trees = {}
        man = ctx.manifest()
        renames = []
        for filenm in man.keys():
            # write blob if not in our git database
            fctx = ctx.filectx(filenm)
            rename = fctx.renamed()
            if rename:
                filerename, sha = rename
                renames.append((filerename, filenm))
            is_exec = 'x' in fctx.flags()
            is_link = 'l' in fctx.flags()
            file_id = hex(fctx.filenode())
            blob_sha = self.map_git_get(file_id)
            if not blob_sha:
                blob_sha = self.git.write_blob(fctx.data()) # writing new blobs to git
                self.map_set(blob_sha, file_id)

            parts = filenm.split('/')
            if len(parts) > 1:
                # get filename and path for leading subdir
                filepath = parts[-1:][0]
                dirpath = "/".join([v for v in parts[0:-1]]) + '/'

                # get subdir name and path for parent dir
                parpath = '/'
                nparpath = '/'
                for part in parts[0:-1]:
                    if nparpath == '/':
                        nparpath = part + '/'
                    else:
                        nparpath += part + '/'

                    treeentry = ['tree', part + '/', nparpath]

                    if parpath not in trees:
                        trees[parpath] = []
                    if treeentry not in trees[parpath]:
                        trees[parpath].append( treeentry )

                    parpath = nparpath

                # set file entry
                fileentry = ['blob', filepath, blob_sha, is_exec, is_link]
                if dirpath not in trees:
                    trees[dirpath] = []
                trees[dirpath].append(fileentry)

            else:
                fileentry = ['blob', parts[0], blob_sha, is_exec, is_link]
                if '/' not in trees:
                    trees['/'] = []
                trees['/'].append(fileentry)

        dirs = trees.keys()
        if dirs:
            # sort by tree depth, so we write the deepest trees first
            dirs.sort(lambda a, b: len(b.split('/'))-len(a.split('/')))
            dirs.remove('/')
            dirs.append('/')
        else:
            # manifest is empty => make empty root tree
            trees['/'] = []
            dirs = ['/']

        # write all the trees
        tree_sha = None
        tree_shas = {}
        for dirnm in dirs:
            tree_data = []
            for entry in trees[dirnm]:
                # replace tree path with tree SHA
                if entry[0] == 'tree':
                    sha = tree_shas[entry[2]]
                    entry[2] = sha
                tree_data.append(entry)
            tree_sha = self.git.write_tree_array(tree_data) # writing new trees to git
            tree_shas[dirnm] = tree_sha
        
        return (tree_sha, renames) # should be the last root tree sha

    def remote_head(self, remote_name):
        for head, sha in self.git.remote_refs(remote_name).iteritems():
            if head == 'HEAD':
                return self.map_hg_get(sha)
        return None

    def upload_pack(self, remote_name):
        git_url = self.remote_name_to_url(remote_name)
        client, path = self.get_transport_and_path(git_url)
        changed = self.get_changed_refs
        genpack = self.generate_pack_contents
        try:
            self.ui.status(_("creating and sending data\n"))
            changed_refs = client.send_pack(path, changed, genpack)
            if changed_refs:
                new_refs = {}
                for ref, sha in changed_refs.iteritems():
                    self.ui.status("    "+ remote_name + "::" + ref + " => GIT:" + sha[0:8] + "\n")
                    new_refs[ref] = sha
                self.git.set_remote_refs(new_refs, remote_name)
                self.update_hg_bookmarks(remote_name)
        except:
            # TODO : remove try/except or do something useful here
            raise

    # TODO : for now, we'll just push all heads that match remote heads
    #        * we should have specified push, tracking branches and --all
    # takes a dict of refs:shas from the server and returns what should be
    # pushed up
    def get_changed_refs(self, refs):
        keys = refs.keys()

        changed = {}
        if not keys:
            return None

        # TODO : this is a huge hack
        if keys[0] == 'capabilities^{}': # nothing on the server yet - first push
            changed['refs/heads/master'] = self.git.ref('master')

        tags = self.git.get_tags()
        for tag, sha in tags.iteritems():
            tag_name = 'refs/tags/' + tag
            if tag_name not in refs:
                changed[tag_name] = sha

        for ref_name in keys:
            parts = ref_name.split('/')
            if parts[0] == 'refs': # strip off 'refs/heads'
                if parts[1] == 'heads':
                    head = "/".join([v for v in parts[2:]])
                    local_ref = self.git.ref(ref_name)
                    if local_ref:
                        if not local_ref == refs[ref_name]:
                            changed[ref_name] = local_ref
        
        # Also push any local branches not on the server yet
        for head in self.local_heads():
            if not head in refs:
                ref = self.git.ref(head)
                changed[head] = ref

        return changed

    # takes a list of shas the server wants and shas the server has
    # and generates a list of commit shas we need to push up
    def generate_pack_contents(self, want, have):
        graph_walker = SimpleFetchGraphWalker(want, self.git.get_parents)
        next = graph_walker.next()
        shas = set()
        while next:
            if next in have:
                graph_walker.ack(next)
            else:
                shas.add(next)
            next = graph_walker.next()
        
        seen = []
        
        # so now i have the shas, need to turn them into a list of
        # tuples (sha, path) for ALL the objects i'm sending
        # TODO : don't send blobs or trees they already have
        def get_objects(tree, path):
            changes = list()
            changes.append((tree, path))
            for (mode, name, sha) in tree.entries():
                if mode == 0160000: # TODO : properly handle submodules and document what 57344 means
                    continue
                if sha in seen:
                    continue
                    
                obj = self.git.get_object(sha)
                seen.append(sha)
                if isinstance (obj, Blob):
                    changes.append((obj, path + name))
                elif isinstance(obj, Tree):
                    changes.extend(get_objects(obj, path + name + '/'))
            return changes

        objects = []
        for commit_sha in shas:
            commit = self.git.commit(commit_sha)
            objects.append((commit, 'commit'))
            tree = self.git.get_object(commit.tree)
            objects.extend( get_objects(tree, '/') )

        return objects

    def fetch_pack(self, remote_name):
        git_url = self.remote_name_to_url(remote_name)
        client, path = self.get_transport_and_path(git_url)
        graphwalker = SimpleFetchGraphWalker(self.git.heads().values(), self.git.get_parents)
        f, commit = self.git.object_store.add_pack()
        try:
            determine_wants = self.git.object_store.determine_wants_all
            refs = client.fetch_pack(path, determine_wants, graphwalker, f.write, sys.stdout.write)
            f.close()
            commit()
            if refs:
                self.git.set_remote_refs(refs, remote_name)
            else:
                self.ui.status(_("nothing new on the server\n"))
            return refs
        except:
            f.close()
            raise

    # take refs just fetched, add local tags for all tags not in .hgtags
    def import_local_tags(self, refs):
        keys = refs.keys()
        if not keys:
            return None
        for k in keys[0:]:
            ref_name = k
            parts = k.split('/')
            if (parts[0] == 'refs' and parts[1] == 'tags'):
                ref_name = "/".join([v for v in parts[2:]])
                if ref_name[-3:] == '^{}':
                    ref_name = ref_name[:-3]
                if not ref_name in self.repo.tags():
                    obj = self.git.get_object(refs[k])
                    sha = None
                    if isinstance (obj, Commit): # lightweight
                        sha = self.map_hg_get(refs[k])
                    if isinstance (obj, Tag): # annotated
                        (obj_type, obj_sha) = obj.get_object()
                        obj = self.git.get_object(obj_sha)
                        if isinstance (obj, Commit):                
                            sha = self.map_hg_get(obj_sha)
                    if sha:
                        self.repo.tag(ref_name, hex_to_sha(sha), '', True, None, None)
                    
        
    def import_git_objects(self, remote_name=None, refs=None):
        self.ui.status(_("importing Git objects into Hg\n"))
        # import heads and fetched tags as remote references
        todo = []
        done = set()
        convert_list = {}
        self.renames = {}

        # get a list of all the head shas
        if refs: 
          for head, sha in refs.iteritems():
            todo.append(sha)
        else:
          if remote_name:
              todo = self.git.remote_refs(remote_name).values()[:]
          elif self.importbranch:
              branches = self.importbranch.split(',')
              todo = [self.git.ref(i.strip()) for i in branches]
          else:
              todo = self.git.heads().values()[:]

        # traverse the heads getting a list of all the unique commits
        while todo:
            sha = todo.pop()
            assert isinstance(sha, str)
            if sha in done:
                continue
            done.add(sha)
            obj = self.git.get_object(sha)
            if isinstance (obj, Commit):                
                convert_list[sha] = obj
                todo.extend([p for p in obj.parents if p not in done])
            if isinstance(obj, Tag):
                (obj_type, obj_sha) = obj.get_object()
                obj = self.git.get_object(obj_sha)
                if isinstance (obj, Commit):                
                    convert_list[sha] = obj
                    todo.extend([p for p in obj.parents if p not in done])

        # sort the commits
        commits = toposort.TopoSort(convert_list).items()
        
        # import each of the commits, oldest first
        total = len(commits)
        magnitude = int(math.log(total, 10)) + 1 if total else 1
        for i, csha in enumerate(commits):
            if i%100 == 0:
                self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total))
            commit = convert_list[csha]
            if not self.map_hg_get(csha): # it's already here
                self.import_git_commit(commit)
            else:
                # we need to get rename info for further upstream
                self.pseudo_import_git_commit(commit)

        self.update_hg_bookmarks(remote_name)

    def update_hg_bookmarks(self, remote_name):
        try:
            bms = bookmarks.parse(self.repo)
            if remote_name:
                heads = self.git.remote_refs(remote_name)
            else:
                branches = self.bookbranch.split(',')
                heads = dict((i, self.git.ref(i.strip())) for i in branches)

            base_name = (remote_name + '/') if remote_name else '' 

            for head, sha in heads.iteritems():
                if not sha:
                    self.ui.warn(_("Could not resolve head %s.\n") % head)
                    continue
                hgsha = hex_to_sha(self.map_hg_get(sha))
                if not head == 'HEAD':
                    bms[base_name + head] = hgsha
            if heads:
                bookmarks.write(self.repo, bms)

        except AttributeError:
            self.ui.warn(_('creating bookmarks failed, do you have'
                         ' bookmarks enabled?\n'))

    def convert_git_int_mode(self, mode):
	# TODO : make these into constants
        convert = {
         0100644: '',
         0100755: 'x',
         0120000: 'l'}
        if mode in convert:
            return convert[mode]
        return ''

    def extract_hg_metadata(self, message):
        split = message.split("\n\n--HG--\n", 1)
        renames = {}
        extra = {}
        files = []
        branch = False
        if len(split) == 2:
            message, meta = split
            lines = meta.split("\n")
            for line in lines:
                if line == '':
                    continue 
                
                command, data = line.split(" : ", 1)
                
                if command == 'rename':
                    before, after = data.split(" => ", 1)
                    renames[after] = before
                if command == 'branch':
                    branch = data
                if command == 'files':
                    files.append(data)
                if command == 'extra':
                    before, after = data.split(" : ", 1)
                    extra[before] = urllib.unquote(after)
        return (message, renames, branch, files, extra)

    def pseudo_import_git_commit(self, commit):
        (strip_message, hg_renames, hg_branch) = self.extract_hg_metadata(commit.message)
        cs = self.map_hg_get(commit.id)
        p1 = nullid
        p2 = nullid
        if len(commit.parents) > 0:
            sha = commit.parents[0]
            p1 = self.map_hg_get(sha)
        if len(commit.parents) > 1:
            sha = commit.parents[1]
            p2 = self.map_hg_get(sha)
        if len(commit.parents) > 2:
            # TODO : map extra parents to the extras file
            pass
        # saving rename info
        if (not (p2 == nullid) or (p1 == nullid)):
            self.renames[cs] = {}
        else:
            self.renames[cs] = self.renames[p1].copy()

        self.renames[cs].update(hg_renames)
    
    def import_git_commit(self, commit):
        self.ui.debug(_("importing: %s\n") % commit.id)
        # TODO : Do something less coarse-grained than try/except on the
        #        get_file call for removed files

        (strip_message, hg_renames, hg_branch, files, extra) = self.extract_hg_metadata(commit.message)
        
        # get a list of the changed, added, removed files
        files = self.git.get_files_changed(commit)

        date = (commit.author_time, -commit.author_timezone)
        text = strip_message

        def getfilectx(repo, memctx, f):
            try:
                (mode, sha, data) = self.git.get_file(commit, f)
                e = self.convert_git_int_mode(mode)
            except TypeError:
                raise IOError()
            if f in hg_renames:
                copied_path = hg_renames[f]
            else:
                copied_path = None
            return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path)

        gparents = map(self.map_hg_get, commit.parents)
        p1, p2 = (nullid, nullid)
        octopus = False

        if len(gparents) > 1:
            # merge, possibly octopus
            def commit_octopus(p1, p2):
                ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
                                     commit.author, date, {'hg-git': 'octopus'})
                return hex(self.repo.commitctx(ctx))

            octopus = len(gparents) > 2
            p2 = gparents.pop()
            p1 = gparents.pop()
            while len(gparents) > 0:
                p2 = commit_octopus(p1, p2)
                p1 = gparents.pop()
        else:
            if gparents:
                p1 = gparents.pop()

        # wierd hack for explicit file renames in first but not second branch
        if not (p2 == nullid):
            vals = [item for item in self.renames[p1].values() if not item in self.renames[p2].values()]
            for removefile in vals:
                files.remove(removefile)
        author = commit.author

        extra = {}
        if ' <none@none>' in commit.author:
            author = commit.author[:-12]

        # if named branch, add to extra
        if hg_branch:
            extra['branch'] = hg_branch

        # if committer is different than author, add it to extra
        if not commit._author_raw == commit._committer_raw:
            extra['committer'] = "%s %d %d" % (commit.committer, commit.commit_time, -commit.commit_timezone)

        if commit._encoding:
            extra['encoding'] = commit._encoding

        if hg_branch:
            extra['branch'] = hg_branch

        if octopus:
            extra['hg-git'] ='octopus-done'

        ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
                             author, date, extra)
        node = self.repo.commitctx(ctx)

        # save changeset to mapping file
        cs = hex(node)
        self.map_set(commit.id, cs)
        
        # saving rename info
        if (not (p2 == nullid) or (p1 == nullid)):
            self.renames[cs] = {}
        else:
            self.renames[cs] = self.renames[p1].copy()
            
        self.renames[cs].update(hg_renames)
        

    def check_bookmarks(self):
        if self.ui.config('extensions', 'hgext.bookmarks') is not None:
            self.ui.warn("YOU NEED TO SETUP BOOKMARKS\n")

    def get_transport_and_path(self, uri):
        from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient
        for handler, transport in (("git://", TCPGitClient), ("git@", SSHGitClient), ("git+ssh://", SSHGitClient)):
            if uri.startswith(handler):
                if handler == 'git@':
                    host, path = uri[len(handler):].split(":", 1)
                    host = 'git@' + host
                else:
                    host, path = uri[len(handler):].split("/", 1)
                return transport(host), '/' + path
        # if its not git or git+ssh, try a local url..
        return SubprocessGitClient(), uri

    def clear(self):
        mapfile = self.repo.join(self.mapfile)
        if os.path.exists(self.gitdir):
            for root, dirs, files in os.walk(self.gitdir, topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
            os.rmdir(self.gitdir)
        if os.path.exists(mapfile):
            os.remove(mapfile)
Esempio n. 23
0
#!/usr/bin/env python2

import os.path
import urlparse
from email.utils import formatdate
from dulwich.repo import Repo
from dulwich.objects import Blob, Tree, Commit
from docutils import io, nodes
from docutils.core import publish_doctree, publish_from_doctree
from render import MyWriter

repo = Repo(".")
commit_sha = repo.head()
commit = repo.get_object(commit_sha)
index = repo.open_index()
assert not list(index.changes_from_tree(repo.object_store, commit.tree)), "uncommited changes"

store = repo.object_store


def render_rst(blob, path):
    doc = publish_doctree(blob.as_raw_string())
    for node in doc.traverse(nodes.reference):
        uri = urlparse.urlparse(node['refuri'])
        if not uri.netloc and os.path.basename(uri.path) == "README.rst":
            node['refuri'] = urlparse.urlunparse(
                (uri.scheme, uri.netloc, uri.path[:-10] or "./", uri.params, uri.query, uri.fragment))

    output = publish_from_doctree(
        doc,
        destination_path=path,
Esempio n. 24
0
class GitHandler(object):
    mapfile = 'git-mapfile'
    tagsfile = 'git-tags'

    def __init__(self, dest_repo, ui):
        self.repo = dest_repo
        self.ui = ui

        if ui.configbool('git', 'intree'):
            self.gitdir = self.repo.wjoin('.git')
        else:
            self.gitdir = self.repo.join('git')

        self.paths = ui.configitems('paths')

        self.load_map()
        self.load_tags()

    # make the git data directory
    def init_if_missing(self):
        if os.path.exists(self.gitdir):
            self.git = Repo(self.gitdir)
        else:
            os.mkdir(self.gitdir)
            self.git = Repo.init_bare(self.gitdir)

    ## FILE LOAD AND SAVE METHODS

    def map_set(self, gitsha, hgsha):
        self._map_git[gitsha] = hgsha
        self._map_hg[hgsha] = gitsha

    def map_hg_get(self, gitsha):
        return self._map_git.get(gitsha)

    def map_git_get(self, hgsha):
        return self._map_hg.get(hgsha)

    def load_map(self):
        self._map_git = {}
        self._map_hg = {}
        if os.path.exists(self.repo.join(self.mapfile)):
            for line in self.repo.opener(self.mapfile):
                gitsha, hgsha = line.strip().split(' ', 1)
                self._map_git[gitsha] = hgsha
                self._map_hg[hgsha] = gitsha

    def save_map(self):
        file = self.repo.opener(self.mapfile, 'w+', atomictemp=True)
        for hgsha, gitsha in sorted(self._map_hg.iteritems()):
            file.write("%s %s\n" % (gitsha, hgsha))
        file.rename()


    def load_tags(self):
        self.tags = {}
        if os.path.exists(self.repo.join(self.tagsfile)):
            for line in self.repo.opener(self.tagsfile):
                sha, name = line.strip().split(' ', 1)
                self.tags[name] = sha

    def save_tags(self):
        file = self.repo.opener(self.tagsfile, 'w+', atomictemp=True)
        for name, sha in sorted(self.tags.iteritems()):
            if not self.repo.tagtype(name) == 'global':
                file.write("%s %s\n" % (sha, name))
        file.rename()

    ## END FILE LOAD AND SAVE METHODS

    ## COMMANDS METHODS

    def import_commits(self, remote_name):
        self.import_git_objects(remote_name)
        self.save_map()

    def fetch(self, remote, heads):
        self.export_commits()
        refs = self.fetch_pack(remote, heads)
        remote_name = self.remote_name(remote)

        if refs:
            self.import_git_objects(remote_name, refs)
            self.import_tags(refs)
            self.update_hg_bookmarks(refs)
            if remote_name:
                self.update_remote_branches(remote_name, refs)
            elif not self.paths:
                # intial cloning
                self.update_remote_branches('default', refs)
        else:
            self.ui.status(_("nothing new on the server\n"))

        self.save_map()

    def export_commits(self):
        try:
            self.export_git_objects()
            self.export_hg_tags()
            self.update_references()
        finally:
            self.save_map()

    def get_refs(self, remote):
        self.export_commits()
        client, path = self.get_transport_and_path(remote)
        old_refs = {}
        new_refs = {}
        def changed(refs):
            old_refs.update(refs)
            to_push = set(self.local_heads().values() + self.tags.values())
            new_refs.update(self.get_changed_refs(refs, to_push, True))
            # don't push anything
            return {}

        try:
            client.send_pack(path, changed, None)

            changed_refs = [ref for ref, sha in new_refs.iteritems()
                            if sha != old_refs.get(ref)]
            new = [bin(self.map_hg_get(new_refs[ref])) for ref in changed_refs]
            old = dict( (bin(self.map_hg_get(old_refs[r])), 1)
                       for r in changed_refs if r in old_refs)

            return old, new
        except HangupException:
            raise hgutil.Abort("the remote end hung up unexpectedly")

    def push(self, remote, revs, force):
        self.export_commits()
        changed_refs = self.upload_pack(remote, revs, force)
        remote_name = self.remote_name(remote)

        if remote_name and changed_refs:
            for ref, sha in changed_refs.iteritems():
                self.ui.status("    %s::%s => GIT:%s\n" %
                               (remote_name, ref, sha[0:8]))

            self.update_remote_branches(remote_name, changed_refs)

    def clear(self):
        mapfile = self.repo.join(self.mapfile)
        if os.path.exists(self.gitdir):
            for root, dirs, files in os.walk(self.gitdir, topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
            os.rmdir(self.gitdir)
        if os.path.exists(mapfile):
            os.remove(mapfile)

    ## CHANGESET CONVERSION METHODS

    def export_git_objects(self):
        self.ui.status(_("importing Hg objects into Git\n"))
        self.init_if_missing()

        nodes = [self.repo.lookup(n) for n in self.repo]
        export = [node for node in nodes if not hex(node) in self._map_hg]
        total = len(export)
        for i, rev in enumerate(export):
            util.progress(self.ui, 'import', i, total=total)
            ctx = self.repo.changectx(rev)
            state = ctx.extra().get('hg-git', None)
            if state == 'octopus':
                self.ui.debug("revision %d is a part "
                              "of octopus explosion\n" % ctx.rev())
                continue
            self.export_hg_commit(rev)
        util.progress(self.ui, 'import', None, total=total)


    # convert this commit into git objects
    # go through the manifest, convert all blobs/trees we don't have
    # write the commit object (with metadata info)
    def export_hg_commit(self, rev):
        self.ui.note(_("converting revision %s\n") % hex(rev))

        oldenc = self.swap_out_encoding()

        ctx = self.repo.changectx(rev)
        extra = ctx.extra()

        commit = Commit()

        (time, timezone) = ctx.date()
        commit.author = self.get_git_author(ctx)
        commit.author_time = int(time)
        commit.author_timezone = -timezone

        if 'committer' in extra:
            # fixup timezone
            (name, timestamp, timezone) = extra['committer'].rsplit(' ', 2)
            commit.committer = name
            commit.commit_time = timestamp

            # work around a timezone format change
            if int(timezone) % 60 != 0: #pragma: no cover
                timezone = parse_timezone(timezone)
            else:
                timezone = -int(timezone)
            commit.commit_timezone = timezone
        else:
            commit.committer = commit.author
            commit.commit_time = commit.author_time
            commit.commit_timezone = commit.author_timezone

        commit.parents = []
        for parent in self.get_git_parents(ctx):
            hgsha = hex(parent.node())
            git_sha = self.map_git_get(hgsha)
            if git_sha:
                commit.parents.append(git_sha)

        commit.message = self.get_git_message(ctx)

        if 'encoding' in extra:
            commit.encoding = extra['encoding']

        tree_sha = commit_tree(self.git.object_store, self.iterblobs(ctx))
        commit.tree = tree_sha

        self.git.object_store.add_object(commit)
        self.map_set(commit.id, ctx.hex())

        self.swap_out_encoding(oldenc)
        return commit.id

    def get_git_author(self, ctx):
        # hg authors might not have emails
        author = ctx.user()

        # check for git author pattern compliance
        regex = re.compile('^(.*?) \<(.*?)\>(.*)$')
        a = regex.match(author)

        if a:
            name = a.group(1)
            email = a.group(2)
            if len(a.group(3)) > 0:
                name += ' ext:(' + urllib.quote(a.group(3)) + ')'
            author = name + ' <' + email + '>'
        else:
            author = author + ' <none@none>'

        if 'author' in ctx.extra():
            author = "".join(apply_delta(author, ctx.extra()['author']))

        return author

    def get_git_parents(self, ctx):
        def is_octopus_part(ctx):
            return ctx.extra().get('hg-git', None) in ('octopus', 'octopus-done')

        parents = []
        if ctx.extra().get('hg-git', None) == 'octopus-done':
            # implode octopus parents
            part = ctx
            while is_octopus_part(part):
                (p1, p2) = part.parents()
                assert not is_octopus_part(p1)
                parents.append(p1)
                part = p2
            parents.append(p2)
        else:
            parents = ctx.parents()

        return parents

    def get_git_message(self, ctx):
        extra = ctx.extra()

        message = ctx.description() + "\n"
        if 'message' in extra:
            message = "".join(apply_delta(message, extra['message']))

        # HG EXTRA INFORMATION
        add_extras = False
        extra_message = ''
        if not ctx.branch() == 'default':
            add_extras = True
            extra_message += "branch : " + ctx.branch() + "\n"

        renames = []
        for f in ctx.files():
            if f not in ctx.manifest():
                continue
            rename = ctx.filectx(f).renamed()
            if rename:
                renames.append((rename[0], f))

        if renames:
            add_extras = True
            for oldfile, newfile in renames:
                extra_message += "rename : " + oldfile + " => " + newfile + "\n"

        for key, value in extra.iteritems():
            if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'):
                continue
            else:
                add_extras = True
                extra_message += "extra : " + key + " : " +  urllib.quote(value) + "\n"

        if add_extras:
            message += "\n--HG--\n" + extra_message

        return message

    def iterblobs(self, ctx):
        for f in ctx:
            fctx = ctx[f]
            blobid = self.map_git_get(hex(fctx.filenode()))

            if not blobid:
                blob = Blob.from_string(fctx.data())
                self.git.object_store.add_object(blob)
                self.map_set(blob.id, hex(fctx.filenode()))
                blobid = blob.id

            if 'l' in ctx.flags(f):
                mode = 0120000
            elif 'x' in ctx.flags(f):
                mode = 0100755
            else:
                mode = 0100644

            yield f, blobid, mode

    def import_git_objects(self, remote_name=None, refs=None):
        self.ui.status(_("importing Git objects into Hg\n"))
        self.init_if_missing()

        # import heads and fetched tags as remote references
        todo = []
        done = set()
        convert_list = {}

        # get a list of all the head shas
        seenheads = set()
        if refs is None:
            refs = self.git.refs.as_dict()
        if refs:
            for sha in refs.itervalues():
                # refs contains all the refs in the server, not just the ones
                # we are pulling
                if sha in self.git.object_store:
                    obj = self.git.get_object(sha)
                    while isinstance(obj, Tag):
                        obj_type, sha = obj.object
                        obj = self.git.get_object(sha)
                    if isinstance (obj, Commit) and sha not in seenheads:
                        seenheads.add(sha)
                        todo.append(sha)

        # sort by commit date
        def commitdate(sha):
            obj = self.git.get_object(sha)
            return obj.commit_time-obj.commit_timezone

        todo.sort(key=commitdate, reverse=True)

        # traverse the heads getting a list of all the unique commits
        commits = []
        seen = set(todo)
        while todo:
            sha = todo[-1]
            if sha in done:
                todo.pop()
                continue
            assert isinstance(sha, str)
            obj = self.git.get_object(sha)
            assert isinstance(obj, Commit)
            for p in obj.parents:
                if p not in done:
                    todo.append(p)
                    break
            else:
                commits.append(sha)
                convert_list[sha] = obj
                done.add(sha)
                todo.pop()

        commits = [commit for commit in commits if not commit in self._map_git]
        # import each of the commits, oldest first
        total = len(commits)
        for i, csha in enumerate(commits):
            util.progress(self.ui, 'import', i, total=total, unit='commits')
            commit = convert_list[csha]
            self.import_git_commit(commit)
        util.progress(self.ui, 'import', None, total=total, unit='commits')

    def import_git_commit(self, commit):
        self.ui.debug(_("importing: %s\n") % commit.id)

        (strip_message, hg_renames,
         hg_branch, extra) = self.extract_hg_metadata(commit.message)

        # get a list of the changed, added, removed files
        files = self.get_files_changed(commit)

        date = (commit.author_time, -commit.author_timezone)
        text = strip_message

        origtext = text
        try:
            text.decode('utf-8')
        except UnicodeDecodeError:
            text = self.decode_guess(text, commit.encoding)

        text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n')
        if text + '\n' != origtext:
            extra['message'] = create_delta(text +'\n', origtext)

        author = commit.author

        # convert extra data back to the end
        if ' ext:' in commit.author:
            regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$')
            m = regex.match(commit.author)
            if m:
                name = m.group(1)
                ex = urllib.unquote(m.group(2))
                email = m.group(3)
                author = name + ' <' + email + '>' + ex

        if ' <none@none>' in commit.author:
            author = commit.author[:-12]

        try:
            author.decode('utf-8')
        except UnicodeDecodeError:
            origauthor = author
            author = self.decode_guess(author, commit.encoding)
            extra['author'] = create_delta(author, origauthor)

        oldenc = self.swap_out_encoding()

        def getfilectx(repo, memctx, f):
            delete, mode, sha = files[f]
            if delete:
                raise IOError

            data = self.git[sha].data
            copied_path = hg_renames.get(f)
            e = self.convert_git_int_mode(mode)

            return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path)

        gparents = map(self.map_hg_get, commit.parents)
        p1, p2 = (nullid, nullid)
        octopus = False

        if len(gparents) > 1:
            # merge, possibly octopus
            def commit_octopus(p1, p2):
                ctx = context.memctx(self.repo, (p1, p2), text, list(files), getfilectx,
                                     author, date, {'hg-git': 'octopus'})
                return hex(self.repo.commitctx(ctx))

            octopus = len(gparents) > 2
            p2 = gparents.pop()
            p1 = gparents.pop()
            while len(gparents) > 0:
                p2 = commit_octopus(p1, p2)
                p1 = gparents.pop()
        else:
            if gparents:
                p1 = gparents.pop()

        pa = None
        if not (p2 == nullid):
            node1 = self.repo.changectx(p1)
            node2 = self.repo.changectx(p2)
            pa = node1.ancestor(node2)

        # if named branch, add to extra
        if hg_branch:
            extra['branch'] = hg_branch

        # if committer is different than author, add it to extra
        if commit.author != commit.committer \
               or commit.author_time != commit.commit_time \
               or commit.author_timezone != commit.commit_timezone:
            extra['committer'] = "%s %d %d" % (
                commit.committer, commit.commit_time, -commit.commit_timezone)

        if commit.encoding:
            extra['encoding'] = commit.encoding

        if hg_branch:
            extra['branch'] = hg_branch

        if octopus:
            extra['hg-git'] ='octopus-done'

        # TODO use 'n in self.repo' when we require hg 1.5
        def repo_contains(n):
            try:
                return bool(self.repo.lookup(n))
            except error.RepoLookupError:
                return False

        if not (repo_contains(p1) and repo_contains(p2)):
            raise hgutil.Abort(_('you appear to have run strip - '
                                 'please run hg git-cleanup'))
        ctx = context.memctx(self.repo, (p1, p2), text, list(files), getfilectx,
                             author, date, extra)

        node = self.repo.commitctx(ctx)

        self.swap_out_encoding(oldenc)

        # save changeset to mapping file
        cs = hex(node)
        self.map_set(commit.id, cs)

    ## PACK UPLOADING AND FETCHING

    def upload_pack(self, remote, revs, force):
        client, path = self.get_transport_and_path(remote)
        def changed(refs):
            to_push = revs or set(self.local_heads().values() + self.tags.values())
            return self.get_changed_refs(refs, to_push, force)

        genpack = self.git.object_store.generate_pack_contents
        try:
            self.ui.status(_("creating and sending data\n"))
            changed_refs = client.send_pack(path, changed, genpack)
            return changed_refs
        except HangupException:
            raise hgutil.Abort("the remote end hung up unexpectedly")

    def get_changed_refs(self, refs, revs, force):
        new_refs = refs.copy()

        #The remote repo is empty and the local one doesn't have bookmarks/tags
        if refs.keys()[0] == 'capabilities^{}':
            del new_refs['capabilities^{}']
            if not self.local_heads():
                tip = hex(self.repo.lookup('tip'))
                bookmarks.bookmark(self.ui, self.repo, 'master', tip, force=True)
                bookmarks.setcurrent(self.repo, 'master')
                new_refs['refs/heads/master'] = self.map_git_get(tip)

        for rev in revs:
            ctx = self.repo[rev]
            heads = [t for t in ctx.tags() if t in self.local_heads()]
            tags = [t for t in ctx.tags() if t in self.tags]

            if not (heads or tags):
                raise hgutil.Abort("revision %s cannot be pushed since"
                                   " it doesn't have a ref" % ctx)

            # Check if the tags the server is advertising are annotated tags,
            # by attempting to retrieve it from the our git repo, and building a
            # list of these tags.
            #
            # This is possible, even though (currently) annotated tags are
            # dereferenced and stored as lightweight ones, as the annotated tag
            # is still stored in the git repo.
            uptodate_annotated_tags = []
            for r in tags:
                ref = 'refs/tags/'+r
                # Check tag.
                if not ref in refs:
                    continue
                try:
                    # We're not using Repo.tag(), as it's deprecated.
                    tag = self.git.get_object(refs[ref])
                    if not isinstance(tag, Tag):
                        continue
                except KeyError:
                    continue

                # If we've reached here, the tag's good.
                uptodate_annotated_tags.append(ref)

            for r in heads + tags:
                if r in heads:
                    ref = 'refs/heads/'+r
                else:
                    ref = 'refs/tags/'+r

                if ref not in refs:
                    new_refs[ref] = self.map_git_get(ctx.hex())
                elif new_refs[ref] in self._map_git:
                    rctx = self.repo[self.map_hg_get(new_refs[ref])]
                    if rctx.ancestor(ctx) == rctx or force:
                        new_refs[ref] = self.map_git_get(ctx.hex())
                    else:
                        raise hgutil.Abort("pushing %s overwrites %s"
                                           % (ref, ctx))
                elif ref in uptodate_annotated_tags:
                    # we already have the annotated tag.
                    pass
                else:
                    raise hgutil.Abort("%s changed on the server, please pull "
                                       "and merge before pushing" % ref)

        return new_refs


    def fetch_pack(self, remote_name, heads):
        client, path = self.get_transport_and_path(remote_name)
        graphwalker = self.git.get_graph_walker()
        def determine_wants(refs):
            if heads:
                want = []
                # contains pairs of ('refs/(heads|tags|...)/foo', 'foo')
                # if ref is just '<foo>', then we get ('foo', 'foo')
                stripped_refs = [
                    (r, r[r.find('/', r.find('/')+1)+1:])
                        for r in refs]
                for h in heads:
                    r = [pair[0] for pair in stripped_refs if pair[1] == h]
                    if not r:
                        raise hgutil.Abort("ref %s not found on remote server" % h)
                    elif len(r) == 1:
                        want.append(refs[r[0]])
                    else:
                        raise hgutil.Abort("ambiguous reference %s: %r" % (h, r))
            else:
                want = [sha for ref, sha in refs.iteritems()
                        if not ref.endswith('^{}')]
            return want
        f, commit = self.git.object_store.add_pack()
        try:
            try:
                return client.fetch_pack(path, determine_wants, graphwalker,
                                         f.write, self.ui.status)
            except HangupException:
                raise hgutil.Abort("the remote end hung up unexpectedly")
        finally:
            commit()

    ## REFERENCES HANDLING

    def update_references(self):
        heads = self.local_heads()

        # Create a local Git branch name for each
        # Mercurial bookmark.
        for key in heads:
            self.git.refs['refs/heads/' + key] = self.map_git_get(heads[key])

    def export_hg_tags(self):
        for tag, sha in self.repo.tags().iteritems():
            # git doesn't like spaces in tag names
            tag = tag.replace(" ", "_")
            if self.repo.tagtype(tag) in ('global', 'git'):
                self.git.refs['refs/tags/' + tag] = self.map_git_get(hex(sha))
                self.tags[tag] = hex(sha)

    def local_heads(self):
        try:
            if getattr(bookmarks, 'parse', None):
                bms = bookmarks.parse(self.repo)
            else:
                bms = self.repo._bookmarks
            return dict([(bm, hex(bms[bm])) for bm in bms])
        except AttributeError: #pragma: no cover
            return {}

    def import_tags(self, refs):
        keys = refs.keys()
        if not keys:
            return
        for k in keys[:]:
            ref_name = k
            parts = k.split('/')
            if parts[0] == 'refs' and parts[1] == 'tags':
                ref_name = "/".join([v for v in parts[2:]])
                # refs contains all the refs in the server, not just
                # the ones we are pulling
                if refs[k] not in self.git.object_store:
                    continue
                if ref_name[-3:] == '^{}':
                    ref_name = ref_name[:-3]
                if not ref_name in self.repo.tags():
                    obj = self.git.get_object(refs[k])
                    sha = None
                    if isinstance (obj, Commit): # lightweight
                        sha = self.map_hg_get(refs[k])
                        self.tags[ref_name] = sha
                    elif isinstance (obj, Tag): # annotated
                        (obj_type, obj_sha) = obj.object
                        obj = self.git.get_object(obj_sha)
                        if isinstance (obj, Commit):
                            sha = self.map_hg_get(obj_sha)
                            # TODO: better handling for annotated tags
                            self.tags[ref_name] = sha
        self.save_tags()

    def update_hg_bookmarks(self, refs):
        try:
            oldbm = getattr(bookmarks, 'parse', None)
            if oldbm:
                bms = bookmarks.parse(self.repo)
            else:
                bms = self.repo._bookmarks
            heads = dict([(ref[11:],refs[ref]) for ref in refs
                          if ref.startswith('refs/heads/')])

            for head, sha in heads.iteritems():
                # refs contains all the refs in the server, not just
                # the ones we are pulling
                if sha not in self.git.object_store:
                    continue
                hgsha = bin(self.map_hg_get(sha))
                if not head in bms:
                    # new branch
                    bms[head] = hgsha
                else:
                    bm = self.repo[bms[head]]
                    if bm.ancestor(self.repo[hgsha]) == bm:
                        # fast forward
                        bms[head] = hgsha
            if heads:
                if oldbm:
                    bookmarks.write(self.repo, bms)
                else:
                    self.repo._bookmarks = bms
                    bookmarks.write(self.repo)

        except AttributeError:
            self.ui.warn(_('creating bookmarks failed, do you have'
                         ' bookmarks enabled?\n'))

    def update_remote_branches(self, remote_name, refs):
        def _set_hg_tag(head, sha):
            # refs contains all the refs in the server, not just the ones
            # we are pulling
            if sha not in self.git.object_store:
                return
            hgsha = bin(self.map_hg_get(sha))
            tag = '%s/%s' % (remote_name, head)
            self.repo.tag(tag, hgsha, '', True, None, None)

        for ref_name, sha in refs.iteritems():
            if ref_name.startswith('refs/heads'):
                head = ref_name[11:]
                _set_hg_tag(head, sha)

                new_ref = 'refs/remotes/%s/%s' % (remote_name, head)
                self.git.refs[new_ref] = sha
            elif (ref_name.startswith('refs/tags')
                  and not ref_name.endswith('^{}')):
                self.git.refs[ref_name] = sha


    ## UTILITY FUNCTIONS

    def convert_git_int_mode(self, mode):
        # TODO: make these into constants
        convert = {
         0100644: '',
         0100755: 'x',
         0120000: 'l'}
        if mode in convert:
            return convert[mode]
        return ''

    def extract_hg_metadata(self, message):
        split = message.split("\n--HG--\n", 1)
        renames = {}
        extra = {}
        branch = False
        if len(split) == 2:
            message, meta = split
            lines = meta.split("\n")
            for line in lines:
                if line == '':
                    continue

                command, data = line.split(" : ", 1)

                if command == 'rename':
                    before, after = data.split(" => ", 1)
                    renames[after] = before
                if command == 'branch':
                    branch = data
                if command == 'extra':
                    before, after = data.split(" : ", 1)
                    extra[before] = urllib.unquote(after)
        return (message, renames, branch, extra)

    def get_file(self, commit, f):
        otree = self.git.tree(commit.tree)
        parts = f.split('/')
        for part in parts:
            (mode, sha) = otree[part]
            obj = self.git.get_object(sha)
            if isinstance (obj, Blob):
                return (mode, sha, obj._text)
            elif isinstance(obj, Tree):
                otree = obj

    def get_files_changed(self, commit):
        tree = commit.tree
        btree = None

        if commit.parents:
            btree = self.git[commit.parents[0]].tree

        changes = self.git.object_store.tree_changes(btree, tree)
        files = {}
        for (oldfile, newfile), (oldmode, newmode), (oldsha, newsha) in changes:
            # don't create new submodules
            if newmode == 0160000:
                if oldfile:
                    # become a regular delete
                    newfile, newmode = None, None
                else:
                    continue
            # so old submodules shoudn't exist
            if oldmode == 0160000:
                if newfile:
                    # become a regular add
                    oldfile, oldmode = None, None
                else:
                    continue

            if newfile is None:
                file = oldfile
                delete = True
            else:
                file = newfile
                delete = False

            files[file] = (delete, newmode, newsha)

        return files

    def remote_name(self, remote):
        names = [name for name, path in self.paths if path == remote]
        if names:
            return names[0]

    # Stolen from hgsubversion
    def swap_out_encoding(self, new_encoding='UTF-8'):
        try:
            from mercurial import encoding
            old = encoding.encoding
            encoding.encoding = new_encoding
        except ImportError:
            old = hgutil._encoding
            hgutil._encoding = new_encoding
        return old

    def decode_guess(self, string, encoding):
        # text is not valid utf-8, try to make sense of it
        if encoding:
            try:
                return string.decode(encoding).encode('utf-8')
            except UnicodeDecodeError:
                pass

        try:
            return string.decode('latin-1').encode('utf-8')
        except UnicodeDecodeError:
            return string.decode('ascii', 'replace').encode('utf-8')

    def get_transport_and_path(self, uri):
        for handler, transport in (("git://", client.TCPGitClient),
                                   ("git@", client.SSHGitClient),
                                   ("git+ssh://", client.SSHGitClient)):
            if uri.startswith(handler):
                # We need to split around : or /, whatever comes first
                hostpath = uri[len(handler):]
                if (hostpath.find(':') > 0 and hostpath.find('/') > 0):
                    # we have both, whatever is first wins.
                    if hostpath.find(':') < hostpath.find('/'):
                      hostpath_seper = ':'
                    else:
                      hostpath_seper = '/'
                elif hostpath.find(':') > 0:
                    hostpath_seper = ':'
                else:
                    hostpath_seper = '/'

                host, path = hostpath.split(hostpath_seper, 1)
                if hostpath_seper == '/':
                    transportpath = '/' + path
                else:
                    transportpath = path
                return transport(host, thin_packs=False), transportpath
        # if its not git or git+ssh, try a local url..
        return client.SubprocessGitClient(thin_packs=False), uri
Esempio n. 25
0
class GitStorage():
    def _ignoreFile(self, dirName, fileName):
        """
        used for the copTree stuff
        ``dirName``
            the working directory
        ``fileName``
            list of files inside the directory (dirName)
        """
        result = []
        for i in fileName:
            path = dirName + i
            if path not in fileToIgnore:
                result.append(path)
        return result

    def _commit(self, tree):
        """
        commit a tree used only by the init
        ``tree``
            tree to commit
        """
        commit = Commit()
        commit.tree = tree.id
        commit.encoding = "UTF-8"
        commit.committer = commit.author = 'debexpo <%s>' % (
            pylons.config['debexpo.email'])
        commit.commit_time = commit.author_time = int(time())
        tz = parse_timezone('-0200')[0]
        commit.commit_timezone = commit.author_timezone = tz
        commit.message = " "
        self.repo.object_store.add_object(tree)
        self.repo.object_store.add_object(commit)
        self.repo.refs["HEAD"] = commit.id
        log.debug('commiting')
        return commit.id

    def __init__(self, path):
        #creating the repository
        if os.path.isdir(path):
            log.debug("directory exist, taking it as a git repository")
            self.repo = Repo(path)
        else:
            log.debug("directory doesn't exist, creating")
            os.makedirs(path)
            log.debug("initiate the repo")
            self.repo = Repo.init(path)
            log.debug("adding an empty tree to the repository")
            self._commit(Tree())

    #only this function will be used on upload
    def change(self, files):
        """
        used to change  afile in the git storage can be called for the first upload we don't care
        ``files``
            a list of file to change
        """
        if len(files) == 0:
            log.debug("trying to change nothing will do... nothing")
        else:
            log.debug("this will change %i files" % (len(files)))
            for f in files:
                self.repo.stage(str(f))
            log.debug("stages dones")
            self.repo.do_commit(
                "this is so awesome that nobody will never see it",
                committer="same here <*****@*****.**>")

    def buildTreeDiff(self, dest, tree=None, originalTree=None):
        """
        creating files from the diff between 2 trees, it will be used in the code browser to get older version
        (walking on history)
        ``tree``
            the tree that you want to compare to
        ``dest``
            the destination folder to build sources
        ``originalTree``
            the original Tree, by default it's the last one

        by default it's retun the last changed files

        """
        if tree is None:
            head = self.repo.commit(
                self.repo.commit(self.repo.head()).parents[0])
            tree = self.repo.tree(head.tree)
        if originalTree is None:
            originalTree = self.repo.tree(
                self.repo.commit(self.repo.head()).tree)
        blobToBuild = []
        #getting blob that have changed
        for blob in self.repo.object_store.iter_tree_contents(tree.id):
            if blob not in originalTree:
                blobToBuild.append(blob)
                fileToIgnore.append(blob.path)
        repoLocation = os.path.join(str(self.repo).split("'")[1])
        #creating the folder with link to older files
        if os.path.exists(repoLocation + dest):
            log.warning("%s already exist, copy will not work")
        else:
            log.debug("copying files")
            shutil.copytree(repoLocation,
                            repoLocation + dest,
                            symlinks=True,
                            ignore=self._ignoreFile)
        for b in blobToBuild:
            fileDirectory = os.path.split(b.path)
            fileDirectory.pop()
            if not os.path.exists(
                    os.path.join(repoLocation + dest,
                                 os.path.join(fileDirectory))):
                os.makedirs(
                    os.path.join(repoLocation + dest,
                                 os.path.join(fileDirectory)))
            file = open(os.path.join(repoLocation + dest, b.path), 'w')
            file.write(self.repo.get_object(b.sha).as_raw_string())
            file.close()
        tree = None
        originalTree = None

    #get*
    def getLastTree(self):
        """
        return the last tree
        """
        return self.repo.tree(self.repo._commit(self.repo.head()).tree)

    def getAllTrees(self):
        """
        return trees
        """
        result = []
        commit = self.repo._commit(self.repo.head())
        for c in commit._get_parents():
            result.append(c.tree)
        return result

    def getOlderFileContent(self, file):
        """
        return the first file's content that changed from the file
        ``file``
            the file to work on
        """
        with open(file) as f:
            originalBlob = Blob.from_string("".join(f.readlines()))
        trees = self.getAllTrees()
        for t in trees:
            #parsing tree in order to find the tree where the file change
            if originalBlob not in t:
                tree = t
                break
                #tree must be existent, other way file is not correct
        if tree is None:
            log.error(
                "there is no tree that contain this blob this souldn't happen, other way this file does not appear to come from this package"
            )
        else:
            if self.repo._commit(self.repo.head()).tree == tree:
                olderTree = self.repo.commit(
                    self.repo.head())._get_parents()[0].tree
            else:
                for c in self.repo._commit(self.repo.head())._get_parents():
                    if c.tree == tree:
                        try:
                            olderTree = c.get_parents()[0]
                        except IndexError:
                            log.debug("file is the last version")
                            olderTree = tree
            if olderTree != tree:
                #we must check here the blob that contains the older file
                for b in self.repo.object_store.iter_tree_contents(
                        olderTree.id):
                    if originalBlob.path == b.path:
                        #older blob find! awesome, in the first loop we already test if they are the same
                        # that's why we can now return the content of the file
                        return self.repo.get_object(b.sha).as_raw_string()
        return ""

    def getOlderCommits(self):
        """
        return a list of all commits
        """
        return self.repo.commit(self.repo.head())._get_parents()
Esempio n. 26
0
def git_commit_info():
    git = Repo('.')
    commit = git.get_object(git.head())
    return {'id': commit.id.decode("utf-8")[0:7], 'id_full': commit.id.decode("utf-8"),
            'author': regex.findall("(.*?) <(.*?)>", commit.author.decode("utf-8"))[0],
            'message': commit.message.decode("utf-8").strip('\r\n').split('\n')[0]}
Esempio n. 27
0
class Gits3(object):

    def __init__(self, path):
        self.path = path
        self.open_repo(path)

    def open_repo(self, path):
        self.repo = Repo(path)
  
    def get_id(self, ref):
        return self.repo.get_refs()[ref]
             
    def get_updates(self, local_ref, tracking_ref):        
        
                
        refs = self.repo.get_refs()
        for key, value in refs.iteritems():
           print key, value
        
        local = refs[local_ref]
        try:
            remote = refs[tracking_ref]
        except KeyError:
            remote = None
            
        
        if local == remote:
            return None
        
        local_object = self.repo.get_object(local)
        
        
        commits = self.get_commits(local_object, [remote])
        objects = self.get_objects(commits)
        print objects
        
        if remote:
            remote_object = self.repo.get_object(remote)
            filtered_objects = self.filter_objects(objects, remote_object)
        else:
            filtered_objects = objects
        
        filtered_objects = set(filtered_objects)
        return filtered_objects
        
        
    def filter_objects(self, objects, old_commit):
        filtered = []
        old_treeId = old_commit.tree
        old_objects = self.get_objects_in_tree(old_treeId)
        for object in objects:
            if object not in old_objects:
                filtered.append(object)
                
        return filtered
        
    def get_commits(self, interesting, uninteresting):
        commits = [interesting]
        remaining = interesting.get_parents()
        
        while remaining:
            pId = remaining.pop(0)
            if pId in uninteresting:
                continue    
            else:
                parent = self.repo.get_object(pId)
                commits.append(parent)
                parents = parent.get_parents()
                remaining.extend(parents)
        return commits
    
    
    def get_objects(self, commits):
        objects = []
        while commits:
            commit = commits.pop(0)
            objects.append(commit)
            objects.extend(self.get_objects_in_tree(commit.tree))
        return objects    
            
            
    
    def get_objects_in_tree(self, treeId):
        objects = []
        tree = self.repo.get_object(treeId)
        objects.append(tree)
        entries = tree.entries()
        for entryId in entries:
            # get the entry's sha 
            objectId = entryId[2]
            object = self.repo.get_object(objectId) 
            if isinstance(object, Tree):
                objects.extend(self.get_objects_in_tree(objectId))
            else:
                objects.append(object)    
        return objects
      
    def generate_pack_name(self, objects):
        m = hashlib.sha1()
        for object in objects:
            sha1 = object.sha().hexdigest()
#            print sha1
            m.update(sha1)
        file_name = m.hexdigest()
#        print 'File Name is ', file_name
        return file_name
    
    
    def write_pack(self, pack_name, objects):
         write_pack('pack-' + pack_name, [(x, "") for x in objects], 
            len(objects))
        
    def find_tracking_ref_names(self, fetch, refs):
        if fetch[0] == '+':
            fetch = fetch[1:]
        tmp = fetch.split(':')
        src = tmp[0]
        dst = tmp[1]
        
        # TODO double check that both src and dst have wild cards, or both don't
        
        # match the source with refs
        if src.endswith('*') and refs.startswith(src[:-1]):
            return self.expand_from_src(src, dst, refs)
        else:
            return dst                                         
            
        
    def expand_from_src(self, src, dst, refs):
        return dst[:-1] + refs[len(src)-1:]
Esempio n. 28
0
class Gits3(object):
    def __init__(self, path):
        self.path = path
        self.open_repo(path)

    def open_repo(self, path):
        self.repo = Repo(path)

    def get_id(self, ref):
        return self.repo.get_refs()[ref]

    def get_updates(self, local_ref, tracking_ref):

        refs = self.repo.get_refs()
        for key, value in refs.iteritems():
            print key, value

        local = refs[local_ref]
        try:
            remote = refs[tracking_ref]
        except KeyError:
            remote = None

        if local == remote:
            return None

        local_object = self.repo.get_object(local)

        commits = self.get_commits(local_object, [remote])
        objects = self.get_objects(commits)
        print objects

        if remote:
            remote_object = self.repo.get_object(remote)
            filtered_objects = self.filter_objects(objects, remote_object)
        else:
            filtered_objects = objects

        filtered_objects = set(filtered_objects)
        return filtered_objects

    def filter_objects(self, objects, old_commit):
        filtered = []
        old_treeId = old_commit.tree
        old_objects = self.get_objects_in_tree(old_treeId)
        for object in objects:
            if object not in old_objects:
                filtered.append(object)

        return filtered

    def get_commits(self, interesting, uninteresting):
        commits = [interesting]
        remaining = interesting.parents

        while remaining:
            pId = remaining.pop(0)
            if pId in uninteresting:
                continue
            else:
                parent = self.repo.get_object(pId)
                commits.append(parent)
                parents = parent.parents
                remaining.extend(parents)
        return commits

    def get_objects(self, commits):
        objects = []
        while commits:
            commit = commits.pop(0)
            objects.append(commit)
            objects.extend(self.get_objects_in_tree(commit.tree))
        return objects

    def get_objects_in_tree(self, treeId):
        objects = []
        tree = self.repo.get_object(treeId)
        objects.append(tree)
        for entryId in tree.items():
            # get the entry's sha
            objectId = entryId[2]
            object = self.repo.get_object(objectId)
            if isinstance(object, Tree):
                objects.extend(self.get_objects_in_tree(objectId))
            else:
                objects.append(object)
        return objects

    def generate_pack_name(self, objects):
        m = hashlib.sha1()
        for object in objects:
            sha1 = object.sha().hexdigest()
            #            print sha1
            m.update(sha1)
        file_name = m.hexdigest()
        #        print 'File Name is ', file_name
        return file_name

    def write_pack(self, pack_name, objects):
        write_pack('pack-' + pack_name, [(x, "") for x in objects],
                   len(objects))

    def find_tracking_ref_names(self, fetch, refs):
        if fetch[0] == '+':
            fetch = fetch[1:]
        tmp = fetch.split(':')
        src = tmp[0]
        dst = tmp[1]

        # TODO double check that both src and dst have wild cards, or both don't

        # match the source with refs
        if src.endswith('*') and refs.startswith(src[:-1]):
            return self.expand_from_src(src, dst, refs)
        else:
            return dst

    def expand_from_src(self, src, dst, refs):
        return dst[:-1] + refs[len(src) - 1:]
Esempio n. 29
0
class GitStorage():
    def _ignoreFile(self, dirName, fileName):
        """
        used for the copTree stuff
        ``dirName``
            the working directory
        ``fileName``
            list of files inside the directory (dirName)
        """
        result = []
        for i in fileName:
            path = dirName + i
            if path not in fileToIgnore:
                result.append(path)
        return result

    def _commit(self, tree):
        """
        commit a tree used only by the init
        ``tree``
            tree to commit
        """
        commit = Commit()
        commit.tree = tree.id
        commit.encoding = "UTF-8"
        commit.committer = commit.author = 'debexpo <%s>' % (pylons.config['debexpo.email'])
        commit.commit_time = commit.author_time = int(time())
        tz = parse_timezone('-0200')[0]
        commit.commit_timezone = commit.author_timezone = tz
        commit.message = " "
        self.repo.object_store.add_object(tree)
        self.repo.object_store.add_object(commit)
        self.repo.refs["HEAD"] = commit.id
        log.debug('commiting')
        return commit.id

    def __init__(self, path):
        #creating the repository
        if os.path.isdir(path):
            log.debug("directory exist, taking it as a git repository")
            self.repo = Repo(path)
        else:
            log.debug("directory doesn't exist, creating")
            os.makedirs(path)
            log.debug("initiate the repo")
            self.repo = Repo.init(path)
            log.debug("adding an empty tree to the repository")
            self._commit(Tree())

    #only this function will be used on upload
    def change(self, files):
        """
        used to change  afile in the git storage can be called for the first upload we don't care
        ``files``
            a list of file to change
        """
        if len(files) == 0:
            log.debug("trying to change nothing will do... nothing")
        else:
            log.debug("this will change %i files" % (len(files)))
            for f in files:
                self.repo.stage(str(f))
            log.debug("stages dones")
            self.repo.do_commit("this is so awesome that nobody will never see it",
                committer="same here <*****@*****.**>")

    def buildTreeDiff(self, dest, tree=None, originalTree=None):
        """
        creating files from the diff between 2 trees, it will be used in the code browser to get older version
        (walking on history)
        ``tree``
            the tree that you want to compare to
        ``dest``
            the destination folder to build sources
        ``originalTree``
            the original Tree, by default it's the last one

        by default it's retun the last changed files

        """
        if tree is None:
            head = self.repo.commit(self.repo.commit(self.repo.head()).parents[0])
            tree = self.repo.tree(head.tree)
        if originalTree is None:
            originalTree = self.repo.tree(self.repo.commit(self.repo.head()).tree)
        blobToBuild = []
        #getting blob that have changed
        for blob in self.repo.object_store.iter_tree_contents(tree.id):
            if blob not in originalTree:
                blobToBuild.append(blob)
                fileToIgnore.append(blob.path)
        repoLocation = os.path.join(str(self.repo).split("'")[1])
        #creating the folder with link to older files
        if os.path.exists(repoLocation + dest):
            log.warning("%s already exist, copy will not work")
        else:
            log.debug("copying files")
            shutil.copytree(repoLocation, repoLocation + dest, symlinks=True, ignore=self._ignoreFile)
        for b in blobToBuild:
            fileDirectory = os.path.split(b.path)
            fileDirectory.pop()
            if not os.path.exists(os.path.join(repoLocation + dest, os.path.join(fileDirectory))):
                os.makedirs(os.path.join(repoLocation + dest, os.path.join(fileDirectory)))
            file = open(os.path.join(repoLocation + dest, b.path), 'w')
            file.write(self.repo.get_object(b.sha).as_raw_string())
            file.close()
        tree = None
        originalTree = None

    #get*
    def getLastTree(self):
        """
        return the last tree
        """
        return self.repo.tree(self.repo._commit(self.repo.head()).tree)

    def getAllTrees(self):
        """
        return trees
        """
        result = []
        commit = self.repo._commit(self.repo.head())
        for c in commit._get_parents():
            result.append(c.tree)
        return result

    def getOlderFileContent(self, file):
        """
        return the first file's content that changed from the file
        ``file``
            the file to work on
        """
        with open(file) as f:
            originalBlob = Blob.from_string("".join(f.readlines()))
        trees = self.getAllTrees()
        for t in trees:
            #parsing tree in order to find the tree where the file change
            if originalBlob not in t:
                tree = t
                break
                #tree must be existent, other way file is not correct
        if tree is None:
            log.error(
                "there is no tree that contain this blob this souldn't happen, other way this file does not appear to come from this package")
        else:
            if self.repo._commit(self.repo.head()).tree == tree:
                olderTree = self.repo.commit(self.repo.head())._get_parents()[0].tree
            else:
                for c in self.repo._commit(self.repo.head())._get_parents():
                    if c.tree == tree:
                        try:
                            olderTree = c.get_parents()[0]
                        except IndexError:
                            log.debug("file is the last version")
                            olderTree = tree
            if olderTree != tree:
                #we must check here the blob that contains the older file
                for b in self.repo.object_store.iter_tree_contents(olderTree.id):
                    if originalBlob.path == b.path:
                        #older blob find! awesome, in the first loop we already test if they are the same
                        # that's why we can now return the content of the file
                        return self.repo.get_object(b.sha).as_raw_string()
        return ""

    def getOlderCommits(self):
        """
        return a list of all commits
        """
        return self.repo.commit(self.repo.head())._get_parents()
Esempio n. 30
0
class GitHandler(object):
    mapfile = 'git-mapfile'
    tagsfile = 'git-tags'

    def __init__(self, dest_repo, ui):
        self.repo = dest_repo
        self.ui = ui

        if ui.configbool('git', 'intree'):
            self.gitdir = self.repo.wjoin('.git')
        else:
            self.gitdir = self.repo.join('git')

        self.paths = ui.configitems('paths')

        self.load_map()
        self.load_tags()

    # make the git data directory
    def init_if_missing(self):
        if os.path.exists(self.gitdir):
            self.git = Repo(self.gitdir)
        else:
            os.mkdir(self.gitdir)
            self.git = Repo.init_bare(self.gitdir)

    ## FILE LOAD AND SAVE METHODS

    def map_set(self, gitsha, hgsha):
        self._map_git[gitsha] = hgsha
        self._map_hg[hgsha] = gitsha

    def map_hg_get(self, gitsha):
        return self._map_git.get(gitsha)

    def map_git_get(self, hgsha):
        return self._map_hg.get(hgsha)

    def load_map(self):
        self._map_git = {}
        self._map_hg = {}
        if os.path.exists(self.repo.join(self.mapfile)):
            for line in self.repo.opener(self.mapfile):
                gitsha, hgsha = line.strip().split(' ', 1)
                self._map_git[gitsha] = hgsha
                self._map_hg[hgsha] = gitsha

    def save_map(self):
        file = self.repo.opener(self.mapfile, 'w+', atomictemp=True)
        for hgsha, gitsha in sorted(self._map_hg.iteritems()):
            file.write("%s %s\n" % (gitsha, hgsha))
        file.rename()

    def load_tags(self):
        self.tags = {}
        if os.path.exists(self.repo.join(self.tagsfile)):
            for line in self.repo.opener(self.tagsfile):
                sha, name = line.strip().split(' ', 1)
                self.tags[name] = sha

    def save_tags(self):
        file = self.repo.opener(self.tagsfile, 'w+', atomictemp=True)
        for name, sha in sorted(self.tags.iteritems()):
            if not self.repo.tagtype(name) == 'global':
                file.write("%s %s\n" % (sha, name))
        file.rename()

    ## END FILE LOAD AND SAVE METHODS

    ## COMMANDS METHODS

    def import_commits(self, remote_name):
        self.import_git_objects(remote_name)
        self.save_map()

    def fetch(self, remote, heads):
        self.export_commits()
        refs = self.fetch_pack(remote, heads)
        remote_name = self.remote_name(remote)

        if refs:
            self.import_git_objects(remote_name, refs)
            self.import_tags(refs)
            self.update_hg_bookmarks(refs)
            if remote_name:
                self.update_remote_branches(remote_name, refs)
            elif not self.paths:
                # intial cloning
                self.update_remote_branches('default', refs)
        else:
            self.ui.status(_("nothing new on the server\n"))

        self.save_map()

    def export_commits(self):
        try:
            self.export_git_objects()
            self.export_hg_tags()
            self.update_references()
        finally:
            self.save_map()

    def get_refs(self, remote):
        self.export_commits()
        client, path = self.get_transport_and_path(remote)
        old_refs = {}
        new_refs = {}

        def changed(refs):
            old_refs.update(refs)
            to_push = set(self.local_heads().values() + self.tags.values())
            new_refs.update(self.get_changed_refs(refs, to_push, True))
            # don't push anything
            return {}

        try:
            client.send_pack(path, changed, None)

            changed_refs = [
                ref for ref, sha in new_refs.iteritems()
                if sha != old_refs.get(ref)
            ]
            new = [bin(self.map_hg_get(new_refs[ref])) for ref in changed_refs]
            old = dict((bin(self.map_hg_get(old_refs[r])), 1)
                       for r in changed_refs if r in old_refs)

            return old, new
        except HangupException:
            raise hgutil.Abort("the remote end hung up unexpectedly")

    def push(self, remote, revs, force):
        self.export_commits()
        changed_refs = self.upload_pack(remote, revs, force)
        remote_name = self.remote_name(remote)

        if remote_name and changed_refs:
            for ref, sha in changed_refs.iteritems():
                self.ui.status("    %s::%s => GIT:%s\n" %
                               (remote_name, ref, sha[0:8]))

            self.update_remote_branches(remote_name, changed_refs)

    def clear(self):
        mapfile = self.repo.join(self.mapfile)
        if os.path.exists(self.gitdir):
            for root, dirs, files in os.walk(self.gitdir, topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
            os.rmdir(self.gitdir)
        if os.path.exists(mapfile):
            os.remove(mapfile)

    ## CHANGESET CONVERSION METHODS

    def export_git_objects(self):
        self.ui.status(_("importing Hg objects into Git\n"))
        self.init_if_missing()

        nodes = [self.repo.lookup(n) for n in self.repo]
        export = [node for node in nodes if not hex(node) in self._map_hg]
        total = len(export)
        for i, rev in enumerate(export):
            util.progress(self.ui, 'importing', i, total=total)
            ctx = self.repo.changectx(rev)
            state = ctx.extra().get('hg-git', None)
            if state == 'octopus':
                self.ui.debug("revision %d is a part "
                              "of octopus explosion\n" % ctx.rev())
                continue
            self.export_hg_commit(rev)
        util.progress(self.ui, 'importing', None, total=total)

    # convert this commit into git objects
    # go through the manifest, convert all blobs/trees we don't have
    # write the commit object (with metadata info)
    def export_hg_commit(self, rev):
        self.ui.note(_("converting revision %s\n") % hex(rev))

        oldenc = self.swap_out_encoding()

        ctx = self.repo.changectx(rev)
        extra = ctx.extra()

        commit = Commit()

        (time, timezone) = ctx.date()
        commit.author = self.get_git_author(ctx)
        commit.author_time = int(time)
        commit.author_timezone = -timezone

        if 'committer' in extra:
            # fixup timezone
            (name, timestamp, timezone) = extra['committer'].rsplit(' ', 2)
            commit.committer = name
            commit.commit_time = timestamp

            # work around a timezone format change
            if int(timezone) % 60 != 0:  #pragma: no cover
                timezone = parse_timezone(timezone)
                # Newer versions of Dulwich return a tuple here
                if isinstance(timezone, tuple):
                    timezone, neg_utc = timezone
                    commit._commit_timezone_neg_utc = neg_utc
            else:
                timezone = -int(timezone)
            commit.commit_timezone = timezone
        else:
            commit.committer = commit.author
            commit.commit_time = commit.author_time
            commit.commit_timezone = commit.author_timezone

        commit.parents = []
        for parent in self.get_git_parents(ctx):
            hgsha = hex(parent.node())
            git_sha = self.map_git_get(hgsha)
            if git_sha:
                commit.parents.append(git_sha)

        commit.message = self.get_git_message(ctx)

        if 'encoding' in extra:
            commit.encoding = extra['encoding']

        tree_sha = commit_tree(self.git.object_store, self.iterblobs(ctx))
        commit.tree = tree_sha

        self.git.object_store.add_object(commit)
        self.map_set(commit.id, ctx.hex())

        self.swap_out_encoding(oldenc)
        return commit.id

    def get_git_author(self, ctx):
        # hg authors might not have emails
        author = ctx.user()

        # check for git author pattern compliance
        regex = re.compile('^(.*?) \<(.*?)\>(.*)$')
        a = regex.match(author)

        if a:
            name = a.group(1)
            email = a.group(2)
            if len(a.group(3)) > 0:
                name += ' ext:(' + urllib.quote(a.group(3)) + ')'
            author = name + ' <' + email + '>'
        else:
            author = author + ' <none@none>'

        if 'author' in ctx.extra():
            author = "".join(apply_delta(author, ctx.extra()['author']))

        return author

    def get_git_parents(self, ctx):
        def is_octopus_part(ctx):
            return ctx.extra().get('hg-git',
                                   None) in ('octopus', 'octopus-done')

        parents = []
        if ctx.extra().get('hg-git', None) == 'octopus-done':
            # implode octopus parents
            part = ctx
            while is_octopus_part(part):
                (p1, p2) = part.parents()
                assert not is_octopus_part(p1)
                parents.append(p1)
                part = p2
            parents.append(p2)
        else:
            parents = ctx.parents()

        return parents

    def get_git_message(self, ctx):
        extra = ctx.extra()

        message = ctx.description() + "\n"
        if 'message' in extra:
            message = "".join(apply_delta(message, extra['message']))

        # HG EXTRA INFORMATION
        add_extras = False
        extra_message = ''
        if not ctx.branch() == 'default':
            add_extras = True
            extra_message += "branch : " + ctx.branch() + "\n"

        renames = []
        for f in ctx.files():
            if f not in ctx.manifest():
                continue
            rename = ctx.filectx(f).renamed()
            if rename:
                renames.append((rename[0], f))

        if renames:
            add_extras = True
            for oldfile, newfile in renames:
                extra_message += "rename : " + oldfile + " => " + newfile + "\n"

        for key, value in extra.iteritems():
            if key in ('author', 'committer', 'encoding', 'message', 'branch',
                       'hg-git'):
                continue
            else:
                add_extras = True
                extra_message += "extra : " + key + " : " + urllib.quote(
                    value) + "\n"

        if add_extras:
            message += "\n--HG--\n" + extra_message

        return message

    def iterblobs(self, ctx):
        for f in ctx:
            fctx = ctx[f]
            blobid = self.map_git_get(hex(fctx.filenode()))

            if not blobid:
                blob = Blob.from_string(fctx.data())
                self.git.object_store.add_object(blob)
                self.map_set(blob.id, hex(fctx.filenode()))
                blobid = blob.id

            if 'l' in ctx.flags(f):
                mode = 0120000
            elif 'x' in ctx.flags(f):
                mode = 0100755
            else:
                mode = 0100644

            yield f, blobid, mode

    def import_git_objects(self, remote_name=None, refs=None):
        self.ui.status(_("importing Git objects into Hg\n"))
        self.init_if_missing()

        # import heads and fetched tags as remote references
        todo = []
        done = set()
        convert_list = {}

        # get a list of all the head shas
        seenheads = set()
        if refs is None:
            refs = self.git.refs.as_dict()
        if refs:
            for sha in refs.itervalues():
                # refs contains all the refs in the server, not just the ones
                # we are pulling
                if sha in self.git.object_store:
                    obj = self.git.get_object(sha)
                    while isinstance(obj, Tag):
                        obj_type, sha = obj.object
                        obj = self.git.get_object(sha)
                    if isinstance(obj, Commit) and sha not in seenheads:
                        seenheads.add(sha)
                        todo.append(sha)

        # sort by commit date
        def commitdate(sha):
            obj = self.git.get_object(sha)
            return obj.commit_time - obj.commit_timezone

        todo.sort(key=commitdate, reverse=True)

        # traverse the heads getting a list of all the unique commits
        commits = []
        seen = set(todo)
        while todo:
            sha = todo[-1]
            if sha in done:
                todo.pop()
                continue
            assert isinstance(sha, str)
            obj = self.git.get_object(sha)
            assert isinstance(obj, Commit)
            for p in obj.parents:
                if p not in done:
                    todo.append(p)
                    break
            else:
                commits.append(sha)
                convert_list[sha] = obj
                done.add(sha)
                todo.pop()

        commits = [commit for commit in commits if not commit in self._map_git]
        # import each of the commits, oldest first
        total = len(commits)
        for i, csha in enumerate(commits):
            util.progress(self.ui, 'importing', i, total=total, unit='commits')
            commit = convert_list[csha]
            self.import_git_commit(commit)
        util.progress(self.ui, 'importing', None, total=total, unit='commits')

    def import_git_commit(self, commit):
        self.ui.debug(_("importing: %s\n") % commit.id)

        (strip_message, hg_renames, hg_branch,
         extra) = self.extract_hg_metadata(commit.message)

        # get a list of the changed, added, removed files
        files = self.get_files_changed(commit)

        date = (commit.author_time, -commit.author_timezone)
        text = strip_message

        origtext = text
        try:
            text.decode('utf-8')
        except UnicodeDecodeError:
            text = self.decode_guess(text, commit.encoding)

        text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n')
        if text + '\n' != origtext:
            extra['message'] = create_delta(text + '\n', origtext)

        author = commit.author

        # convert extra data back to the end
        if ' ext:' in commit.author:
            regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$')
            m = regex.match(commit.author)
            if m:
                name = m.group(1)
                ex = urllib.unquote(m.group(2))
                email = m.group(3)
                author = name + ' <' + email + '>' + ex

        if ' <none@none>' in commit.author:
            author = commit.author[:-12]

        try:
            author.decode('utf-8')
        except UnicodeDecodeError:
            origauthor = author
            author = self.decode_guess(author, commit.encoding)
            extra['author'] = create_delta(author, origauthor)

        oldenc = self.swap_out_encoding()

        def getfilectx(repo, memctx, f):
            delete, mode, sha = files[f]
            if delete:
                raise IOError

            data = self.git[sha].data
            copied_path = hg_renames.get(f)
            e = self.convert_git_int_mode(mode)

            return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path)

        gparents = map(self.map_hg_get, commit.parents)
        p1, p2 = (nullid, nullid)
        octopus = False

        if len(gparents) > 1:
            # merge, possibly octopus
            def commit_octopus(p1, p2):
                ctx = context.memctx(self.repo, (p1, p2), text, list(files),
                                     getfilectx, author, date,
                                     {'hg-git': 'octopus'})
                return hex(self.repo.commitctx(ctx))

            octopus = len(gparents) > 2
            p2 = gparents.pop()
            p1 = gparents.pop()
            while len(gparents) > 0:
                p2 = commit_octopus(p1, p2)
                p1 = gparents.pop()
        else:
            if gparents:
                p1 = gparents.pop()

        pa = None
        if not (p2 == nullid):
            node1 = self.repo.changectx(p1)
            node2 = self.repo.changectx(p2)
            pa = node1.ancestor(node2)

        # if named branch, add to extra
        if hg_branch:
            extra['branch'] = hg_branch

        # if committer is different than author, add it to extra
        if commit.author != commit.committer \
               or commit.author_time != commit.commit_time \
               or commit.author_timezone != commit.commit_timezone:
            extra['committer'] = "%s %d %d" % (
                commit.committer, commit.commit_time, -commit.commit_timezone)

        if commit.encoding:
            extra['encoding'] = commit.encoding

        if hg_branch:
            extra['branch'] = hg_branch

        if octopus:
            extra['hg-git'] = 'octopus-done'

        # TODO use 'n in self.repo' when we require hg 1.5
        def repo_contains(n):
            try:
                return bool(self.repo.lookup(n))
            except error.RepoLookupError:
                return False

        if not (repo_contains(p1) and repo_contains(p2)):
            raise hgutil.Abort(
                _('you appear to have run strip - '
                  'please run hg git-cleanup'))
        ctx = context.memctx(self.repo, (p1, p2), text, list(files),
                             getfilectx, author, date, extra)

        node = self.repo.commitctx(ctx)

        self.swap_out_encoding(oldenc)

        # save changeset to mapping file
        cs = hex(node)
        self.map_set(commit.id, cs)

    ## PACK UPLOADING AND FETCHING

    def upload_pack(self, remote, revs, force):
        client, path = self.get_transport_and_path(remote)

        def changed(refs):
            to_push = revs or set(self.local_heads().values() +
                                  self.tags.values())
            return self.get_changed_refs(refs, to_push, force)

        genpack = self.git.object_store.generate_pack_contents
        try:
            self.ui.status(_("creating and sending data\n"))
            changed_refs = client.send_pack(path, changed, genpack)
            return changed_refs
        except HangupException:
            raise hgutil.Abort("the remote end hung up unexpectedly")

    def get_changed_refs(self, refs, revs, force):
        new_refs = refs.copy()

        #The remote repo is empty and the local one doesn't have bookmarks/tags
        if refs.keys()[0] == 'capabilities^{}':
            del new_refs['capabilities^{}']
            if not self.local_heads():
                tip = hex(self.repo.lookup('tip'))
                bookmarks.bookmark(self.ui,
                                   self.repo,
                                   'master',
                                   tip,
                                   force=True)
                bookmarks.setcurrent(self.repo, 'master')
                new_refs['refs/heads/master'] = self.map_git_get(tip)

        for rev in revs:
            ctx = self.repo[rev]
            if getattr(ctx, 'bookmarks', None):
                labels = lambda c: ctx.tags() + ctx.bookmarks()
            else:
                labels = lambda c: ctx.tags()
            prep = lambda itr: [i.replace(' ', '_') for i in itr]

            heads = [t for t in prep(labels(ctx)) if t in self.local_heads()]
            tags = [t for t in prep(labels(ctx)) if t in self.tags]

            if not (heads or tags):
                raise hgutil.Abort("revision %s cannot be pushed since"
                                   " it doesn't have a ref" % ctx)

            # Check if the tags the server is advertising are annotated tags,
            # by attempting to retrieve it from the our git repo, and building a
            # list of these tags.
            #
            # This is possible, even though (currently) annotated tags are
            # dereferenced and stored as lightweight ones, as the annotated tag
            # is still stored in the git repo.
            uptodate_annotated_tags = []
            for r in tags:
                ref = 'refs/tags/' + r
                # Check tag.
                if not ref in refs:
                    continue
                try:
                    # We're not using Repo.tag(), as it's deprecated.
                    tag = self.git.get_object(refs[ref])
                    if not isinstance(tag, Tag):
                        continue
                except KeyError:
                    continue

                # If we've reached here, the tag's good.
                uptodate_annotated_tags.append(ref)

            for r in heads + tags:
                if r in heads:
                    ref = 'refs/heads/' + r
                else:
                    ref = 'refs/tags/' + r

                if ref not in refs:
                    new_refs[ref] = self.map_git_get(ctx.hex())
                elif new_refs[ref] in self._map_git:
                    rctx = self.repo[self.map_hg_get(new_refs[ref])]
                    if rctx.ancestor(ctx) == rctx or force:
                        new_refs[ref] = self.map_git_get(ctx.hex())
                    else:
                        raise hgutil.Abort("pushing %s overwrites %s" %
                                           (ref, ctx))
                elif ref in uptodate_annotated_tags:
                    # we already have the annotated tag.
                    pass
                else:
                    raise hgutil.Abort("%s changed on the server, please pull "
                                       "and merge before pushing" % ref)

        return new_refs

    def fetch_pack(self, remote_name, heads):
        client, path = self.get_transport_and_path(remote_name)
        graphwalker = self.git.get_graph_walker()

        def determine_wants(refs):
            if heads:
                want = []
                # contains pairs of ('refs/(heads|tags|...)/foo', 'foo')
                # if ref is just '<foo>', then we get ('foo', 'foo')
                stripped_refs = [(r, r[r.find('/',
                                              r.find('/') + 1) + 1:])
                                 for r in refs]
                for h in heads:
                    r = [pair[0] for pair in stripped_refs if pair[1] == h]
                    if not r:
                        raise hgutil.Abort(
                            "ref %s not found on remote server" % h)
                    elif len(r) == 1:
                        want.append(refs[r[0]])
                    else:
                        raise hgutil.Abort("ambiguous reference %s: %r" %
                                           (h, r))
            else:
                want = [
                    sha for ref, sha in refs.iteritems()
                    if not ref.endswith('^{}')
                ]
            return want

        f, commit = self.git.object_store.add_pack()
        try:
            try:
                return client.fetch_pack(path, determine_wants, graphwalker,
                                         f.write, self.ui.status)
            except HangupException:
                raise hgutil.Abort("the remote end hung up unexpectedly")
        finally:
            commit()

    ## REFERENCES HANDLING

    def update_references(self):
        heads = self.local_heads()

        # Create a local Git branch name for each
        # Mercurial bookmark.
        for key in heads:
            self.git.refs['refs/heads/' + key] = self.map_git_get(heads[key])

    def export_hg_tags(self):
        for tag, sha in self.repo.tags().iteritems():
            if self.repo.tagtype(tag) in ('global', 'git'):
                tag = tag.replace(' ', '_')
                self.git.refs['refs/tags/' + tag] = self.map_git_get(hex(sha))
                self.tags[tag] = hex(sha)

    def local_heads(self):
        try:
            if getattr(bookmarks, 'parse', None):
                bms = bookmarks.parse(self.repo)
            else:
                bms = self.repo._bookmarks
            return dict([(bm, hex(bms[bm])) for bm in bms])
        except AttributeError:  #pragma: no cover
            return {}

    def import_tags(self, refs):
        keys = refs.keys()
        if not keys:
            return
        for k in keys[:]:
            ref_name = k
            parts = k.split('/')
            if parts[0] == 'refs' and parts[1] == 'tags':
                ref_name = "/".join([v for v in parts[2:]])
                # refs contains all the refs in the server, not just
                # the ones we are pulling
                if refs[k] not in self.git.object_store:
                    continue
                if ref_name[-3:] == '^{}':
                    ref_name = ref_name[:-3]
                if not ref_name in self.repo.tags():
                    obj = self.git.get_object(refs[k])
                    sha = None
                    if isinstance(obj, Commit):  # lightweight
                        sha = self.map_hg_get(refs[k])
                        self.tags[ref_name] = sha
                    elif isinstance(obj, Tag):  # annotated
                        (obj_type, obj_sha) = obj.object
                        obj = self.git.get_object(obj_sha)
                        if isinstance(obj, Commit):
                            sha = self.map_hg_get(obj_sha)
                            # TODO: better handling for annotated tags
                            self.tags[ref_name] = sha
        self.save_tags()

    def update_hg_bookmarks(self, refs):
        try:
            oldbm = getattr(bookmarks, 'parse', None)
            if oldbm:
                bms = bookmarks.parse(self.repo)
            else:
                bms = self.repo._bookmarks
            heads = dict([(ref[11:], refs[ref]) for ref in refs
                          if ref.startswith('refs/heads/')])

            for head, sha in heads.iteritems():
                # refs contains all the refs in the server, not just
                # the ones we are pulling
                if sha not in self.git.object_store:
                    continue
                hgsha = bin(self.map_hg_get(sha))
                if not head in bms:
                    # new branch
                    bms[head] = hgsha
                else:
                    bm = self.repo[bms[head]]
                    if bm.ancestor(self.repo[hgsha]) == bm:
                        # fast forward
                        bms[head] = hgsha
            if heads:
                if oldbm:
                    bookmarks.write(self.repo, bms)
                else:
                    self.repo._bookmarks = bms
                    bookmarks.write(self.repo)

        except AttributeError:
            self.ui.warn(
                _('creating bookmarks failed, do you have'
                  ' bookmarks enabled?\n'))

    def update_remote_branches(self, remote_name, refs):
        tagfile = self.repo.join(os.path.join('git-remote-refs'))
        tags = self.repo.gitrefs()
        # since we re-write all refs for this remote each time, prune
        # all entries matching this remote from our tags list now so
        # that we avoid any stale refs hanging around forever
        for t in list(tags):
            if t.startswith(remote_name + '/'):
                del tags[t]
        tags = dict((k, hex(v)) for k, v in tags.iteritems())
        store = self.git.object_store
        for ref_name, sha in refs.iteritems():
            if ref_name.startswith('refs/heads'):
                if sha not in store:
                    continue
                hgsha = self.map_hg_get(sha)
                head = ref_name[11:]
                tags['/'.join((remote_name, head))] = hgsha
                # TODO(durin42): what is this doing?
                new_ref = 'refs/remotes/%s/%s' % (remote_name, head)
                self.git.refs[new_ref] = sha
            elif (ref_name.startswith('refs/tags')
                  and not ref_name.endswith('^{}')):
                self.git.refs[ref_name] = sha

        tf = open(tagfile, 'wb')
        for tag, node in tags.iteritems():
            tf.write('%s %s\n' % (node, tag))
        tf.close()

    ## UTILITY FUNCTIONS

    def convert_git_int_mode(self, mode):
        # TODO: make these into constants
        convert = {0100644: '', 0100755: 'x', 0120000: 'l'}
        if mode in convert:
            return convert[mode]
        return ''

    def extract_hg_metadata(self, message):
        split = message.split("\n--HG--\n", 1)
        renames = {}
        extra = {}
        branch = False
        if len(split) == 2:
            message, meta = split
            lines = meta.split("\n")
            for line in lines:
                if line == '':
                    continue

                command, data = line.split(" : ", 1)

                if command == 'rename':
                    before, after = data.split(" => ", 1)
                    renames[after] = before
                if command == 'branch':
                    branch = data
                if command == 'extra':
                    before, after = data.split(" : ", 1)
                    extra[before] = urllib.unquote(after)
        return (message, renames, branch, extra)

    def get_file(self, commit, f):
        otree = self.git.tree(commit.tree)
        parts = f.split('/')
        for part in parts:
            (mode, sha) = otree[part]
            obj = self.git.get_object(sha)
            if isinstance(obj, Blob):
                return (mode, sha, obj._text)
            elif isinstance(obj, Tree):
                otree = obj

    def get_files_changed(self, commit):
        tree = commit.tree
        btree = None

        if commit.parents:
            btree = self.git[commit.parents[0]].tree

        changes = self.git.object_store.tree_changes(btree, tree)
        files = {}
        for (oldfile, newfile), (oldmode, newmode), (oldsha,
                                                     newsha) in changes:
            # don't create new submodules
            if newmode == 0160000:
                if oldfile:
                    # become a regular delete
                    newfile, newmode = None, None
                else:
                    continue
            # so old submodules shoudn't exist
            if oldmode == 0160000:
                if newfile:
                    # become a regular add
                    oldfile, oldmode = None, None
                else:
                    continue

            if newfile is None:
                file = oldfile
                delete = True
            else:
                file = newfile
                delete = False

            files[file] = (delete, newmode, newsha)

        return files

    def remote_name(self, remote):
        names = [name for name, path in self.paths if path == remote]
        if names:
            return names[0]

    # Stolen from hgsubversion
    def swap_out_encoding(self, new_encoding='UTF-8'):
        try:
            from mercurial import encoding
            old = encoding.encoding
            encoding.encoding = new_encoding
        except ImportError:
            old = hgutil._encoding
            hgutil._encoding = new_encoding
        return old

    def decode_guess(self, string, encoding):
        # text is not valid utf-8, try to make sense of it
        if encoding:
            try:
                return string.decode(encoding).encode('utf-8')
            except UnicodeDecodeError:
                pass

        try:
            return string.decode('latin-1').encode('utf-8')
        except UnicodeDecodeError:
            return string.decode('ascii', 'replace').encode('utf-8')

    def get_transport_and_path(self, uri):
        # pass hg's ui.ssh config to dulwich
        if not issubclass(client.get_ssh_vendor, _ssh.SSHVendor):
            client.get_ssh_vendor = _ssh.generate_ssh_vendor(self.ui)

        for handler, transport in (("git://", client.TCPGitClient),
                                   ("git@", client.SSHGitClient),
                                   ("git+ssh://", client.SSHGitClient)):
            if uri.startswith(handler):
                # We need to split around : or /, whatever comes first
                hostpath = uri[len(handler):]
                if (hostpath.find(':') > 0 and hostpath.find('/') > 0):
                    # we have both, whatever is first wins.
                    if hostpath.find(':') < hostpath.find('/'):
                        hostpath_seper = ':'
                    else:
                        hostpath_seper = '/'
                elif hostpath.find(':') > 0:
                    hostpath_seper = ':'
                else:
                    hostpath_seper = '/'

                host, path = hostpath.split(hostpath_seper, 1)
                if hostpath_seper == '/':
                    transportpath = '/' + path
                else:
                    transportpath = path
                return transport(host, thin_packs=False), transportpath
        # if its not git or git+ssh, try a local url..
        return client.SubprocessGitClient(thin_packs=False), uri
Esempio n. 31
0
class Repo(object):
    """
    Wrapper around a libgit Repository that knows:

    * How to get all the files in the repository
    * How to get the oid of HEAD
    * How to get the commit times of the files we want commit times for

    It's written with speed in mind, given the constraints of making
    performant code in python!
    """
    def __init__(self, root_folder):
        self.git = Repository(root_folder)

    def all_files(self):
        """Return a set of all the files under git control"""
        return set([entry.decode() for entry, _ in self.git.open_index().items()])

    @property
    def first_commit(self):
        """Return the oid of HEAD"""
        return self.git.head().decode()

    def file_commit_times(self, use_files_paths, debug=False):
        """
        Traverse the commits in the repository, starting from HEAD until we have
        found the commit times for all the files we care about.

        Yield each file once, only when it is found to be changed in some commit.

        If self.debug is true, also output log.debug for the speed we are going
        through commits (output commits/second every 1000 commits and every
        100000 commits)
        """
        prefixes = PrefixTree()
        prefixes.fill(use_files_paths)

        for entry in self.git.get_walker():
            # Commit time taking into account the timezone
            commit_time = entry.commit.commit_time - entry.commit.commit_timezone

            # Get us the two different tree structures between parents and current
            cf_and_pf, changes = self.tree_structures_for(()
                , entry.commit.tree
                , [self.git.get_object(oid).tree for oid in entry.commit.parents]
                , prefixes
                )

            # Deep dive into any differences
            difference = []
            if changes:
                cfs_and_pfs = [(cf_and_pf, changes)]
                while cfs_and_pfs:
                    nxt, changes = cfs_and_pfs.pop(0)
                    for thing, changes, is_path in self.differences_between(nxt[0], nxt[1], changes, prefixes):
                        if is_path:
                            found = prefixes.remove(thing[:-1], thing[-1])
                            if found:
                                difference.append('/'.join(thing))
                        else:
                            cfs_and_pfs.append((thing, changes))

            # Only yield if there was a difference
            if difference:
                yield entry.commit.sha().hexdigest(), commit_time, difference

            # If nothing remains, then break!
            if not prefixes:
                break

    def entries_in_tree_oid(self, prefix, tree_oid):
        """Find the tree at this oid and return entries prefixed with ``prefix``"""
        try:
            tree = self.git.get_object(tree_oid)
        except KeyError:
            log.warning("Couldn't find object {0}".format(tree_oid))
            return empty
        else:
            return frozenset(self.entries_in_tree(prefix, tree))

    def entries_in_tree(self, prefix, tree):
        """
        Traverse the entries in this tree and yield (prefix, is_tree, oid)

        Where prefix is a tuple of the given prefix and the name of the entry.
        """
        for entry in tree.items():
            if prefix:
                new_prefix = prefix + (entry.path.decode(), )
            else:
                new_prefix = (entry.path.decode(), )

            yield (new_prefix, stat.S_ISDIR(entry.mode), entry.sha)

    def tree_structures_for(self, prefix, current_oid, parent_oids, prefixes):
        """
        Return the entries for this commit, the entries of the parent commits,
        and the difference between the two (current_files - parent_files)
        """
        if prefix and prefixes and prefix not in prefixes:
            return empty, empty

        parent_files = set()
        for oid in parent_oids:
            parent_files.update(self.entries_in_tree_oid(prefix, oid))

        current_files = self.entries_in_tree_oid(prefix, current_oid)
        return (current_files, parent_files), (current_files - parent_files)

    def differences_between(self, current_files, parent_files, changes, prefixes):
        """
        yield (thing, changes, is_path)

        If is_path is true, changes is None and thing is the path as a tuple.

        If is_path is false, thing is the current_files and parent_files for
        that changed treeentry and changes is the difference between current_files
        and parent_files.

        The code here is written to squeeze as much performance as possible out
        of this operation.
        """
        parent_oid = None

        if any(is_tree for _, is_tree, _ in changes):
            if len(changes) == 1:
                wanted_path = list(changes)[0][0]
                parent_oid = frozenset([oid for path, is_tree, oid in parent_files if path == wanted_path and is_tree])
            else:
                parent_values = defaultdict(set)
                parent_changes = parent_files - current_files
                for path, is_tree, oid in parent_changes:
                    if is_tree:
                        parent_values[path].add(oid)

        for path, is_tree, oid in changes:
            if is_tree and path not in prefixes:
                continue

            if not is_tree:
                yield path, None, True
            else:
                parent_oids = parent_oid if parent_oid is not None else parent_values.get(path, empty)
                cf_and_pf, changes = self.tree_structures_for(path, oid, parent_oids, prefixes)
                if changes:
                    yield cf_and_pf, changes, False
Esempio n. 32
0
    def edit(self, content=None, username=None, *virtual_path, **keywords):
        '''id: the git id of the blob before it was edited
        branch: master (default)'''
        #setup the defaults
        branch = "master"
        url = ManagedPath(virtual_path)
        if "branch" in keywords: branch = keywords["branch"]
        sha = self.sha
        print "sha is: ", sha
        print "keywords: ", keywords

        commit = sha
        print "content is: ", content
        print "self.filename is: ", self.filename
        if content is None:
            repo = Repo(self.package.path())
            set_head(repo, branch)
            if not sha:
                print "repo.head() = ", repo.head()
                sha = dulwich.object_store.tree_lookup_path(repo.get_object, repo.get_object(repo.head()).tree, self.filename)[1]
            obj = repo.get_object(sha)
            
            contents = obj.as_pretty_string()
            return_contents = "<form action=\"" + cherrypy.url() + "\" method=\"POST\">"
            return_contents = return_contents + "<textarea name=content rows='20' cols='120'>" + contents + "</textarea><br />"
            #if the user isn't logged in ...
            if not hasattr(cherrypy.session, "login"): return_contents = return_contents + "username: <input type=text name=username value=\"anonymous\"><br />"
            if sha: return_contents = return_contents + "<input type=hidden name=id value=\"" + sha + "\">"
            return_contents = return_contents + "<input type=hidden name=\"branch\" value=\"" + branch + "\">"
            return_contents = return_contents + "<input type=submit name=submit value=edit></form>"
            self.content = return_contents
            self.branch = branch
            return self.respond()
        elif (sha or branch): #it's been edited
            if username==None and hasattr(cherrypy.session, "login"): 
                if cherrypy.session.login==None: raise ValueError, "FileViewer.edit: no username supplied"
            elif username==None or username=="anonymous":
                anon = True #whether or not the user is anonymous
                if SESSION_KEY in cherrypy.session.keys():
                    username = cherrypy.session[SESSION_KEY].username
                    anon = False
                else: username = "******"

                #at least until we get access control lists working
                if anon:
                    if branch=="master": #don't let anonymous users modify branch "master"
                        branch = "anonymous"
                    branch = "anonymous"
                
                #make the blob
                blob = Blob.from_string(content)
                
                repo = Repo(self.package.path())
                #change to the right branch
                last_head = repo.head()
                set_head(repo, "master")
                last_commit = repo.get_object(repo.head())
                tree = repo.tree(repo.get_object(repo.head()).tree)

                #set the file
                tree[self.filename] = (0100644, blob.id)
                
                #make the commit
                commit = Commit()
                commit.tree = tree.id
                commit.parents = [last_head]
                commit.author = commit.committer = username
                commit.commit_time = commit.author_time = int(time.time())
                commit.commit_timezone = commit.author_timezone = parse_timezone("-0600")
                commit.encoding = "UTF-8"
                commit.message = "not implemented yet"

                repo.object_store.add_object(blob)
                repo.object_store.add_object(tree)
                repo.object_store.add_object(commit)
                repo.refs["refs/heads/" + branch] = commit.id
                repo.refs["HEAD"] = "ref: refs/heads/" + branch
                new_link = "<a href=\"/package/" + self.package.name + ":" + branch + "/" + self.filename + "/" + blob.id + "\">go to the new version</a>"
                self.new_link = new_link

            self.content = add_newlines("edited (name=%s, branch=%s, sha=%s) new link: %s\n\n\n" % (username, branch, sha, new_link))
            self.content = self.content + "<pre>" + content + "</pre>"
            self.branch = branch

            return self.respond()