def test_simple_local(self): f1_1 = make_object(Blob, data=b'f1') commit_spec = [[1], [2, 1], [3, 1, 2]] trees = {1: [(b'f1', f1_1), (b'f2', f1_1)], 2: [(b'f1', f1_1), (b'f2', f1_1)], 3: [(b'f1', f1_1), (b'f2', f1_1)], } c1, c2, c3 = build_commit_graph(self.repo.object_store, commit_spec, trees) self.repo.refs[b"refs/heads/master"] = c3.id self.repo.refs[b"refs/tags/foo"] = c3.id target_path = tempfile.mkdtemp() errstream = BytesIO() self.addCleanup(shutil.rmtree, target_path) r = porcelain.clone(self.repo.path, target_path, checkout=False, errstream=errstream) self.addCleanup(r.close) self.assertEqual(r.path, target_path) target_repo = Repo(target_path) self.assertEqual(0, len(target_repo.open_index())) self.assertEqual(c3.id, target_repo.refs[b'refs/tags/foo']) self.assertTrue(b'f1' not in os.listdir(target_path)) self.assertTrue(b'f2' not in os.listdir(target_path)) c = r.get_config() encoded_path = self.repo.path if not isinstance(encoded_path, bytes): encoded_path = encoded_path.encode('utf-8') self.assertEqual(encoded_path, c.get((b'remote', b'origin'), b'url')) self.assertEqual( b'+refs/heads/*:refs/remotes/origin/*', c.get((b'remote', b'origin'), b'fetch'))
def dulwichCommit(self, filePath, fullPath, kind): git = Repo(AUTOGIT_PATH) staged = map(str, [filePath]) git.stage(staged) index = git.open_index() try: committer = git._get_user_identity() except ValueError: committer = "autogit" try: head = git.head() except KeyError: return git.do_commit('%s - autogit commit (via dulwich)' % kind, committer=committer) changes = list( tree_changes(git, index.commit(git.object_store), git['HEAD'].tree)) if changes and len(changes) > 0: return git.do_commit('%s - autogit commit (via dulwich)' % kind, committer=committer) return None
def _clone_submodules(cls, repo: Repo) -> None: """ Helper method to identify configured submodules and clone them recursively. """ repo_root = Path(repo.path) modules_config = repo_root.joinpath(".gitmodules") if modules_config.exists(): config = ConfigFile.from_path(modules_config) url: bytes path: bytes submodules = parse_submodules(config) # type: ignore[no-untyped-call] for path, url, _ in submodules: path_relative = Path(path.decode("utf-8")) path_absolute = repo_root.joinpath(path_relative) source_root = path_absolute.parent source_root.mkdir(parents=True, exist_ok=True) with repo: revision = repo.open_index()[path].sha.decode("utf-8") cls.clone( url=url.decode("utf-8"), source_root=source_root, name=path_relative.name, revision=revision, clean=path_absolute.exists() and not path_absolute.joinpath(".git").is_dir(), )
def test_simple_local(self): f1_1 = make_object(Blob, data=b'f1') commit_spec = [[1], [2, 1], [3, 1, 2]] trees = { 1: [(b'f1', f1_1), (b'f2', f1_1)], 2: [(b'f1', f1_1), (b'f2', f1_1)], 3: [(b'f1', f1_1), (b'f2', f1_1)], } c1, c2, c3 = build_commit_graph(self.repo.object_store, commit_spec, trees) self.repo.refs[b"refs/heads/master"] = c3.id self.repo.refs[b"refs/tags/foo"] = c3.id target_path = tempfile.mkdtemp() errstream = BytesIO() self.addCleanup(shutil.rmtree, target_path) r = porcelain.clone(self.repo.path, target_path, checkout=False, errstream=errstream) self.assertEqual(r.path, target_path) target_repo = Repo(target_path) self.assertEqual(0, len(target_repo.open_index())) self.assertEqual(c3.id, target_repo.refs[b'refs/tags/foo']) self.assertTrue(b'f1' not in os.listdir(target_path)) self.assertTrue(b'f2' not in os.listdir(target_path)) c = r.get_config() encoded_path = self.repo.path if not isinstance(encoded_path, bytes): encoded_path = encoded_path.encode('utf-8') self.assertEqual(encoded_path, c.get((b'remote', b'origin'), b'url')) self.assertEqual(b'+refs/heads/*:refs/remotes/origin/*', c.get((b'remote', b'origin'), b'fetch'))
def _dulwich_status(self): """ Return the git status """ _repo = Repo(self.config['top_dir']) index = _repo.open_index() return list(tree_changes(_repo, index.commit(_repo.object_store), _repo['HEAD'].tree))
def commit(repo: Repo, msg: str) -> str: """Commit everything.""" for tree_path, entry in repo.open_index().items(): full_path = os.path.join(repo.path.encode(), tree_path) blob = blob_from_path_and_stat(full_path, os.lstat(full_path)) if blob.id != entry.sha: repo.stage(tree_path) return repo.do_commit(msg.encode(), b"Source{d} ML Team <*****@*****.**>")
class GitWhoosh: def __init__(self, repos_path, index_path): self.repo = Repo(repos_path) self.index_path = index_path self.git_index = self.repo.open_index() if not exists_in(self.index_path): schema = Schema(path=ID(unique=True, stored=True), itime=STORED, content=TEXT) self.ix = create_in(self.index_path, schema) else: self.ix = open_dir(self.index_path) def hook_index(self, func, path): mtime = self.git_index[path][1] sha = self.git_index[path][8] blob = self.repo[sha].as_raw_string() func(path=path.decode('utf-8'), content=blob.decode('utf-8'), itime=mtime) def index(self, regexp=None): with self.ix.searcher() as searcher: writer = self.ix.writer() # first of all, check for removed items paths = {} for fields in searcher.all_stored_fields(): paths[fields['path']] = fields['itime'] if not fields['path'] in self.git_index: writer.delete_by_term('path', fields['path']) # now check for new or updated items for path in self.git_index: if regexp: if not re.search(regexp, path): continue if path in paths: if self.git_index[path][1] > paths[path.decode('utf-8')]: self.hook_index(writer.update_document, path) else: self.hook_index(writer.add_document, path) writer.commit() def search(self, query): parser = QueryParser('content', schema=self.ix.schema) q = parser.parse(query.decode('utf-8')) found_items = [] with self.ix.searcher() as searcher: results = searcher.search(q, terms=True) for r in results: terms = [] for term in r.matched_terms(): terms.append(term[1]) found_items.append({'path':r['path'], 'terms':terms}) return found_items def __call__(self, environ, start_response): start_response('200 OK', [('Content-Type', 'application/json')]) output = [] qs = environ.get('QUERY_STRING', None) if qs: output = self.search(urllib.unquote(qs)) return json.dumps(output)
def _ls_root(self, workspace=None): from dulwich.repo import Repo outstream = StringIO() r = Repo(self.workspace.working_dir) index = r.open_index() for blob in index.iterblobs(): outstream.write('\t'.join(map(str, blob)) + '\n') return ''.join(outstream.getvalue()).encode(), b''
def _dulwich_status(self): """ Return the git status """ _repo = Repo(self.config['top_dir']) index = _repo.open_index() return list( tree_changes(_repo, index.commit(_repo.object_store), _repo['HEAD'].tree))
def do_import(commits, repo_loc, overwrite = True, author_="Règlement général <*****@*****.**>"): if exists(repo_loc): if overwrite: print("Deleting existing output directory: %s" % repo_loc) shutil.rmtree(repo_loc) os.mkdir(repo_loc) repo = Repo.init(repo_loc) else: repo = Repo(repo_loc) else: os.mkdir(repo_loc) repo = Repo.init(repo_loc) print("Importing %d commit(s)" % len(commits)) for i, commit in enumerate(commits): date = commit[0] print("Commit %d dated %s, %d items" % (i, str(date), len(commit[1]))) print(" authored by %s" % author_) paths_added, paths_removed = create_tree(commit, repo_loc, readme=False, main=commit[2] if len(commit) == 3 else {}) repo.stage([path.encode(sys.getfilesystemencoding()) for path in set(paths_added)]) index = repo.open_index() print(" Removing %d files" % len(paths_removed)) for p in paths_removed: del index[p.encode(sys.getfilesystemencoding())] index.write() author = bytes(author_, "UTF-8") repo.do_commit( bytes("Version du %s" % date.strftime(FMT), "UTF-8"), committer=author, commit_timestamp=date.timestamp(), commit_timezone=int(TZ_PARIS.localize(date).strftime("%z")) * 36) ## create tag tag_name = bytes(date.strftime(ISO_8601), "UTF-8") object = parse_object(repo, "HEAD") tag = Tag() tag.tagger = author tag.name = tag_name tag.message = b'' tag.object = (type(object), object.id) tag.tag_time = int(time.time()) tag.tag_timezone = int(TZ_PARIS.localize(date).strftime("%z")) * 36 repo.object_store.add_object(tag) tag_id = tag.id repo.refs[b'refs/tags/' + tag_name] = tag_id repo.close()
def main(args, hear, talk, complain): """Reset boring changes See doc-string of this file for outline. Required arguments - args, hear, talk and complain -- should, respectively, be (or behave as, e.g. if mocking to test) sys.argv, sys.stdin, sys.stdout and sys.stderr. The only command-line option supported (in args) is a '--disclaim' flag, to treat as boring all changes in files with the standard 'We mean it' disclaimer; it is usual to pass this flag.\n""" ignore = Scanner.disclaimed if '--disclaim' in args else ( lambda p, w: False) # We're in the root directory of the module: repo = Repo('.') store, index = repo.object_store, repo.open_index() renamer = RenameDetector(store) try: # TODO: demand stronger similarity for a copy than for rename; # our huge copyright headers (and common boilerplate) make # small header files look very similar despite their real # content all being quite different. Probably need to hack # dulwich (find_copies_harder is off by default anyway). for kind, old, new in \ renamer.changes_with_renames(store[repo.refs['HEAD']].tree, index.commit(store)): # Each of old, new is a named triple of .path, .mode and # .sha; kind is the change type, in ('add', 'modify', # 'delete', 'rename', 'copy', 'unchanged'), although we # shouldn't get the last. If new.path is None, file was # removed, not renamed; otherwise, if new has a # disclaimer, it's private despite its name and path. if new.path and not ignore(new.path, complain.write): assert kind not in ('unchanged', 'delete'), kind if kind != 'add': # Filter out boring changes index[new.path] = Selector(store, new.sha, old.sha, old.mode or new.mode).refine() elif old.path: # disclaimed or removed: ignore by restoring assert new.path or kind == 'delete', (kind, new.path) index[old.path] = Selector.restore(store[old.sha], old.mode) talk.write(old.path + '\n') if new.path and new.path != old.path: talk.write(new.path + '\n') else: # new but disclaimed: ignore by discarding assert kind == 'add' and new.path, (kind, new.path) del index[new.path] talk.write(new.path + '\n') index.write() except IOError: # ... and any other errors that just mean failure. return 1 return 0
def main(args, hear, talk, complain): # Future: we may want to parse more args, query the user or wrap # talk, complain for verbosity control. ignore = Scanner.disclaimed if '--disclaim' in args else ( lambda p, w: False) # We're in the root directory of the module: repo = Repo('.') store, index = repo.object_store, repo.open_index() renamer = RenameDetector(store) try: # TODO: demand stronger similarity for a copy than for rename; # our huge copyright headers (and common boilerplate) make # small header files look very similar despite their real # content all being quite different. Probably need to hack # dulwich (find_copies_harder is off by default anyway). for kind, old, new in \ renamer.changes_with_renames(store[repo.refs['HEAD']].tree, index.commit(store)): # Each of old, new is a named triple of .path, .mode and # .sha; kind is the change type, in ('add', 'modify', # 'delete', 'rename', 'copy', 'unchanged'), although we # shouldn't get the last. If new.path is None, file was # removed, not renamed; otherwise, if new has a # disclaimer, it's private despite its name and path. if new.path and not ignore(new.path, complain.write): assert kind not in ('unchanged', 'delete'), kind if kind != 'add': # Filter out boring changes index[new.path] = Selector(store, new.sha, old.sha, old.mode or new.mode).refine() elif old.path: # disclaimed or removed: ignore by restoring assert new.path or kind == 'delete', (kind, new.path) index[old.path] = Selector.restore(store[old.sha], old.mode) else: # new but disclaimed: ignore by discarding assert kind == 'add' and new.path, (kind, new.path) del index[new.path] index.write() except IOError: # ... and any other errors that just mean failure. return 1 return 0
def initdGit(path): ''' Takes current working directory (within repository) as argument. initdGit builds dgit path and files, determines whether repository needs dgit initialization,whether directory is already a git repository, adds space for user foaf URI in local config file, and calls function to document repository in .dgit/provenance.ttl ''' os.chdir(path) needsInit = False try: #if error is not raised, repository needs dgit init dgitpath = ".dgit" dgitProv = dgitpath + "/provenance.ttl" os.mkdir(dgitpath) os.system('touch %s'%dgitProv) needsInit = True except: print '''Repository already initialized. To reinitialize Git, please use git init command''' if needsInit: repoExists=False try: #if error is not raised, git repository alreaded inited repo = Repo(path) repoExists=True except: initNewRepo(path,dgitProv) #calls function to initialize git if repoExists: os.system('git config user.foaf "" ') #create space in local config for foaf URI if not list(repo.open_index()): newRepo=True #if no files are tracked in repo, assumes bare repository else: newRepo=False writeProv(path,newRepo) #calls function to write provenance repo.stage([dgitProv]) #stages provenance (via dulwich) print '''Repository converted to dgit repository,
def dulwichCommit(self, filePath, fullPath, kind): git = Repo(AUTOGIT_PATH) staged = map(str,[filePath]) git.stage( staged ) index = git.open_index() try: committer = git._get_user_identity() except ValueError: committer = "autogit" try: head = git.head() except KeyError: return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer) changes = list(tree_changes(git, index.commit(git.object_store), git['HEAD'].tree)) if changes and len(changes) > 0: return git.do_commit( '%s - autogit commit (via dulwich)' % kind, committer=committer) return None
class Gittle(object): """All paths used in Gittle external methods must be paths relative to the git repository """ DEFAULT_COMMIT = 'HEAD' DEFAULT_BRANCH = 'master' DEFAULT_REMOTE = 'origin' DEFAULT_MESSAGE = '**No Message**' DEFAULT_USER_INFO = { 'name': None, 'email': None, } DIFF_FUNCTIONS = { 'classic': utils.git.classic_tree_diff, 'dict': utils.git.dict_tree_diff, 'changes': utils.git.dict_tree_diff } DEFAULT_DIFF_TYPE = 'dict' HIDDEN_REGEXES = [ # Hide git directory r'.*\/\.git\/.*', ] # References REFS_BRANCHES = 'refs/heads/' REFS_REMOTES = 'refs/remotes/' REFS_TAGS = 'refs/tags/' # Name pattern truths # Used for detecting if files are : # - deleted # - added # - changed PATTERN_ADDED = (False, True) PATTERN_REMOVED = (True, False) PATTERN_MODIFIED = (True, True) # Permissions MODE_DIRECTORY = 0o40000 # Used to tell if a tree entry is a directory # Tree depth MAX_TREE_DEPTH = 1000 # Acceptable Root paths ROOT_PATHS = (os.path.curdir, os.path.sep) def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs): if isinstance(repo_or_path, DulwichRepo): self.repo = repo_or_path elif isinstance(repo_or_path, Gittle): self.repo = DulwichRepo(repo_or_path.path) elif isinstance(repo_or_path, basestring): path = os.path.abspath(repo_or_path) self.repo = DulwichRepo(path) else: logging.warning('Repo is of type %s' % type(repo_or_path)) raise Exception('Gittle must be initialized with either a dulwich repository or a string to the path') # Set path self.path = self.repo.path # The remote url self.origin_uri = origin_uri # Report client activty self._report_activity = report_activity # Build ignore filter self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES) self.hidden_regexes.extend(self._get_ignore_regexes()) self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes) self.filters = [ self.ignore_filter, ] # Get authenticator if auth: self.authenticator = auth else: self.auth(*args, **kwargs) def report_activity(self, *args, **kwargs): if not self._report_activity: return return self._report_activity(*args, **kwargs) def _format_author(self, name, email): return "%s <%s>" % (name, email) def _format_userinfo(self, userinfo): name = userinfo.get('name') email = userinfo.get('email') if name and email: return self._format_author(name, email) return None def _format_ref(self, base, extra): return ''.join([base, extra]) def _format_ref_branch(self, branch_name): return self._format_ref(self.REFS_BRANCHES, branch_name) def _format_ref_remote(self, remote_name): return self._format_ref(self.REFS_REMOTES, remote_name) def _format_ref_tag(self, tag_name): return self._format_ref(self.REFS_TAGS, tag_name) @property def head(self): """Return SHA of the current HEAD """ return self.repo.head() @property def is_bare(self): """Bare repositories have no working directories or indexes """ return self.repo.bare @property def is_working(self): return not(self.is_bare) def has_index(self): """Opposite of is_bare """ return self.repo.has_index() @property def has_commits(self): """ If the repository has no HEAD we consider that is has no commits """ try: self.repo.head() except KeyError: return False return True def ref_walker(self, ref=None): """ Very simple, basic walker """ ref = ref or 'HEAD' sha = self._commit_sha(ref) for entry in self.repo.get_walker(sha): yield entry.commit def branch_walker(self, branch): branch = branch or self.active_branch ref = self._format_ref_branch(branch) return self.ref_walker(ref) def commit_info(self, start=0, end=None, branch=None): """Return a generator of commits with all their attached information """ if not self.has_commits: return [] commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)] if not end: return commits return commits[start:end] @funky.uniquify def recent_contributors(self, n=None, branch=None): n = n or 10 return funky.pluck(self.commit_info(end=n, branch=branch), 'author') @property def commit_count(self): try: return len(self.ref_walker()) except KeyError: return 0 def commits(self): """Return a list of SHAs for all the concerned commits """ return [commit['sha'] for commit in self.commit_info()] @property def git_dir(self): return self.repo.controldir() def auth(self, *args, **kwargs): self.authenticator = GittleAuth(*args, **kwargs) return self.authenticator # Generate a branch selector (used for pushing) def _wants_branch(self, branch_name=None): branch_name = branch_name or self.active_branch refs_key = self._format_ref_branch(branch_name) sha = self.branches[branch_name] def wants_func(old): refs_key = self._format_ref_branch(branch_name) return { refs_key: sha } return wants_func def _get_ignore_regexes(self): gitignore_filename = os.path.join(self.path, '.gitignore') if not os.path.exists(gitignore_filename): return [] lines = open(gitignore_filename).readlines() globers = map(lambda line: line.rstrip(), lines) return utils.paths.globers_to_regex(globers) # Get the absolute path for a file in the git repo def abspath(self, repo_file): return os.path.abspath( os.path.join(self.path, repo_file) ) # Get the relative path from the absolute path def relpath(self, abspath): return os.path.relpath(abspath, self.path) @property def last_commit(self): return self[self.repo.head()] @property def index(self): return self.repo.open_index() @classmethod def init(cls, path, bare=None, *args, **kwargs): """Initialize a repository""" mkdir_safe(path) # Constructor to use if bare: constructor = DulwichRepo.init_bare else: constructor = DulwichRepo.init # Create dulwich repo repo = constructor(path) # Create Gittle repo return cls(repo, *args, **kwargs) @classmethod def init_bare(cls, *args, **kwargs): kwargs.setdefault('bare', True) return cls.init(*args, **kwargs) @classmethod def is_repo(cls, path): """Returns True if path is a git repository, False if it is not""" try: repo = Gittle(path) except NotGitRepository: return False else: return True def get_client(self, origin_uri=None, **kwargs): # Get the remote URL origin_uri = origin_uri or self.origin_uri # Fail if inexistant if not origin_uri: raise InvalidRemoteUrl() client_kwargs = {} auth_kwargs = self.authenticator.kwargs() client_kwargs.update(auth_kwargs) client_kwargs.update(kwargs) client_kwargs.update({ 'report_activity': self.report_activity }) client, remote_path = get_transport_and_path(origin_uri, **client_kwargs) return client, remote_path def push_to(self, origin_uri, branch_name=None, progress=None): selector = self._wants_branch(branch_name=branch_name) client, remote_path = self.get_client(origin_uri) return client.send_pack( remote_path, selector, self.repo.object_store.generate_pack_contents, progress=progress ) # Like: git push def push(self, origin_uri=None, branch_name=None, progress=None): return self.push_to(origin_uri, branch_name, progress) # Not recommended at ALL ... !!! def dirty_pull_from(self, origin_uri, branch_name=None): # Remove all previously existing data rmtree(self.path) mkdir_safe(self.path) self.repo = DulwichRepo.init(self.path) # Fetch brand new copy from remote return self.pull_from(origin_uri, branch_name) def pull_from(self, origin_uri, branch_name=None): return self.fetch(origin_uri) # Like: git pull def pull(self, origin_uri=None, branch_name=None): return self.pull_from(origin_uri, branch_name) def fetch_remote(self, origin_uri=None): # Get client client, remote_path = self.get_client(origin_uri=origin_uri) # Fetch data from remote repository remote_refs = client.fetch(remote_path, self.repo) return remote_refs def _setup_fetched_refs(self, refs, origin, bare): remote_tags = utils.git.subrefs(refs, 'refs/tags') remote_heads = utils.git.subrefs(refs, 'refs/heads') # Filter refs clean_remote_tags = utils.git.clean_refs(remote_tags) clean_remote_heads = utils.git.clean_refs(remote_heads) # Base of new refs heads_base = 'refs/remotes/' + origin if bare: heads_base = 'refs/heads' # Import branches self.import_refs( heads_base, clean_remote_heads ) # Import tags self.import_refs( 'refs/tags', clean_remote_tags ) # Update HEAD for k, v in utils.git.clean_refs(refs).items(): self[k] = v def fetch(self, origin_uri=None, bare=None, origin=None): bare = bare or False origin = origin or self.DEFAULT_REMOTE # Remote refs remote_refs = self.fetch_remote(origin_uri) # Update head # Hit repo because head doesn't yet exist so # print("REFS = %s" % remote_refs) # If no refs (empty repository() if not remote_refs: return # Update refs (branches, tags, HEAD) self._setup_fetched_refs(remote_refs, origin, bare) # Checkout working directories if not bare and self.has_commits: self.checkout_all() else: self.update_server_info() @classmethod def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs): """Clone a remote repository""" mkdir_safe(local_path) # Initialize the local repository if bare: local_repo = cls.init_bare(local_path) else: local_repo = cls.init(local_path) repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs) repo.fetch(bare=bare) # Add origin repo.add_remote('origin', origin_uri) return repo @classmethod def clone_bare(cls, *args, **kwargs): """Same as .clone except clones to a bare repository by default """ kwargs.setdefault('bare', True) return cls.clone(*args, **kwargs) def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs): if not tree: # If no tree then stage files modified_files = files or self.modified_files logging.info("STAGING : %s" % modified_files) self.repo.stage(modified_files) # Messages message = message or self.DEFAULT_MESSAGE author_msg = self._format_userinfo(author) committer_msg = self._format_userinfo(committer) return self.repo.do_commit( message=message, author=author_msg, committer=committer_msg, encoding='UTF-8', tree=tree, *args, **kwargs ) def _tree_from_structure(self, structure): # TODO : Support directories tree = Tree() for file_info in structure: # str only try: data = file_info['data'].encode('ascii') name = file_info['name'].encode('ascii') mode = file_info['mode'] except: # Skip file on encoding errors continue blob = Blob() blob.data = data # Store file's contents self.repo.object_store.add_object(blob) # Add blob entry tree.add( name, mode, blob.id ) # Store tree self.repo.object_store.add_object(tree) return tree.id # Like: git commmit -a def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs): user_info = { 'name': name, 'email': email, } return self._commit( committer=user_info, author=user_info, message=message, files=files, *args, **kwargs ) def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs): """Main use is to do commits directly to bare repositories For example doing a first Initial Commit so the repo can be cloned and worked on right away """ if not structure: return tree = self._tree_from_structure(structure) user_info = { 'name': name, 'email': email, } return self._commit( committer=user_info, author=user_info, message=message, tree=tree, *args, **kwargs ) # Push all local commits # and pull all remote commits def sync(self, origin_uri=None): self.push(origin_uri) return self.pull(origin_uri) def lookup_entry(self, relpath, trackable_files=set()): if not relpath in trackable_files: raise KeyError abspath = self.abspath(relpath) with open(abspath, 'rb') as git_file: data = git_file.read() s = sha1() s.update("blob %u\0" % len(data)) s.update(data) return (s.hexdigest(), os.stat(abspath).st_mode) @property @funky.transform(set) def tracked_files(self): return list(self.index) @property @funky.transform(set) def raw_files(self): return utils.paths.subpaths(self.path) @property @funky.transform(set) def ignored_files(self): return utils.paths.subpaths(self.path, filters=self.filters) @property @funky.transform(set) def trackable_files(self): return self.raw_files - self.ignored_files @property @funky.transform(set) def untracked_files(self): return self.trackable_files - self.tracked_files """ @property @funky.transform(set) def modified_staged_files(self): "Checks if the file has changed since last commit" timestamp = self.last_commit.commit_time index = self.index return [ f for f in self.tracked_files if index[f][1][0] > timestamp ] """ # Return a list of tuples # representing the changed elements in the git tree def _changed_entries(self, ref=None): ref = ref or self.DEFAULT_COMMIT if not self.has_commits: return [] obj_sto = self.repo.object_store tree_id = self[ref].tree names = self.trackable_files lookup_func = partial(self.lookup_entry, trackable_files=names) # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...] tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False) return list(tree_diff) @funky.transform(set) def _changed_entries_by_pattern(self, pattern): changed_entries = self._changed_entries() filtered_paths = None #if the pattern is PATTERN_MODIFIED, should check the sha if self.PATTERN_MODIFIED == pattern: filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) and sha[0] == sha[1] ] else : filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) ] return filtered_paths @property @funky.transform(set) def removed_files(self): return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files @property @funky.transform(set) def added_files(self): return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files @property @funky.transform(set) def modified_files(self): modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files return modified_files @property @funky.transform(set) def modified_unstaged_files(self): timestamp = self.last_commit.commit_time return [ f for f in self.tracked_files if os.stat(self.abspath(f)).st_mtime > timestamp ] @property def pending_files(self): """ Returns a list of all files that could be possibly staged """ # Union of both return self.modified_files | self.added_files | self.removed_files @property def pending_files_by_state(self): files = { 'modified': self.modified_files, 'added': self.added_files, 'removed': self.removed_files } # "Flip" the dictionary return { path: state for state, paths in files.items() for path in paths } """ @property @funky.transform(set) def modified_files(self): return self.modified_staged_files | self.modified_unstaged_files """ # Like: git add @funky.arglist_method def stage(self, files): return self.repo.stage(files) def add(self, *args, **kwargs): return self.stage(*args, **kwargs) # Like: git rm @funky.arglist_method def rm(self, files, force=False): index = self.index index_files = filter(lambda f: f in index, files) for f in index_files: del self.index[f] return index.write() def mv_fs(self, file_pair): old_name, new_name = file_pair os.rename(old_name, new_name) # Like: git mv @funky.arglist_method def mv(self, files_pair): index = self.index files_in_index = filter(lambda f: f[0] in index, files_pair) map(self.mv_fs, files_in_index) old_files = map(funky.first, files_in_index) new_files = map(funky.last, files_in_index) self.add(new_files) self.rm(old_files) self.add(old_files) return @working_only def _checkout_tree(self, tree): return build_index_from_tree( self.repo.path, self.repo.index_path(), self.repo.object_store, tree ) def checkout_all(self, commit_sha=None): commit_sha = commit_sha or self.head commit_tree = self._commit_tree(commit_sha) # Rebuild index from the current tree return self._checkout_tree(commit_tree) def checkout(self, ref): """Checkout a given ref or SHA """ self.repo.refs.set_symbolic_ref('HEAD', ref) commit_tree = self._commit_tree(ref) # Rebuild index from the current tree return self._checkout_tree(commit_tree) @funky.arglist_method def reset(self, files, commit='HEAD'): pass def rm_all(self): # if we go at the index via the property, it is reconstructed # each time and therefore clear() doesn't have the desired effect, # therefore, we cache it in a variable and use that. i = self.index i.clear() return i.write() def _to_commit(self, commit_obj): """Allows methods to accept both SHA's or dulwich Commit objects as arguments """ if isinstance(commit_obj, basestring): return self.repo[commit_obj] return commit_obj def _commit_sha(self, commit_obj): """Extracts a Dulwich commits SHA """ if utils.git.is_sha(commit_obj): return commit_obj elif isinstance(commit_obj, basestring): # Can't use self[commit_obj] to avoid infinite recursion commit_obj = self.repo[self.dwim_reference(commit_obj)] return commit_obj.id def dwim_reference(self, ref): """Dwim resolves a short reference to a full reference """ # Formats of refs we want to try in order formats = [ "%s", "refs/%s", "refs/tags/%s", "refs/heads/%s", "refs/remotes/%s", "refs/remotes/%s/HEAD", ] for f in formats: try: fullref = f % ref if not fullref in self.repo: continue return fullref except: continue raise Exception("Could not resolve ref") def blob_data(self, sha): """Return a blobs content for a given SHA """ return self[sha].data # Get the nth parent back for a given commit def get_parent_commit(self, commit, n=None): """ Recursively gets the nth parent for a given commit Warning: Remember that parents aren't the previous commits """ if n is None: n = 1 commit = self._to_commit(commit) parents = commit.parents if n <= 0 or not parents: # Return a SHA return self._commit_sha(commit) parent_sha = parents[0] parent = self[parent_sha] # Recur return self.get_parent_commit(parent, n - 1) def get_previous_commit(self, commit_ref, n=None): commit_sha = self._parse_reference(commit_ref) n = n or 1 commits = self.commits() return funky.next(commits, commit_sha, n=n, default=commit_sha) def _parse_reference(self, ref_string): # COMMIT_REF~x if '~' in ref_string: ref, count = ref_string.split('~') count = int(count) commit_sha = self._commit_sha(ref) return self.get_previous_commit(commit_sha, count) return self._commit_sha(ref_string) def _commit_tree(self, commit_sha): """Return the tree object for a given commit """ return self[commit_sha].tree def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True): diff_type = diff_type or self.DEFAULT_DIFF_TYPE diff_func = self.DIFF_FUNCTIONS[diff_type] if not compare_to: compare_to = self.get_previous_commit(commit_sha) return self._diff_between(compare_to, commit_sha, diff_function=diff_func) def diff_working(self, ref=None, filter_binary=True): """Diff between the current working directory and the HEAD """ return utils.git.diff_changes_paths( self.repo.object_store, self.path, self._changed_entries(ref=ref), filter_binary=filter_binary ) def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None): """Returns a dict of the following Format : { "directory/filename.txt": { 'name': 'filename.txt', 'path': "directory/filename.txt", "sha": "xxxxxxxxxxxxxxxxxxxx", "data": "blablabla", "mode": 0xxxxx", }, ... } """ # Default values context = {} is_tree = is_tree or False parent_path = parent_path or '' if is_tree: tree = self[commit_sha] else: tree = self[self._commit_tree(commit_sha)] for entry in tree.items(): # Check if entry is a directory if entry.mode == self.MODE_DIRECTORY: context.update( self.get_commit_files(entry.sha, parent_path=os.path.join(parent_path, entry.path), is_tree=True, paths=paths) ) continue subpath = os.path.join(parent_path, entry.path) # Only add the files we want if not(paths is None or subpath in paths): continue # Add file entry context[subpath] = { 'name': entry.path, 'path': subpath, 'mode': entry.mode, 'sha': entry.sha, 'data': self.blob_data(entry.sha), } return context def file_versions(self, path): """Returns all commits where given file was modified """ versions = [] commits_info = self.commit_info() seen_shas = set() for commit in commits_info: try: files = self.get_commit_files(commit['sha'], paths=[path]) file_path, file_data = files.items()[0] except IndexError: continue file_sha = file_data['sha'] if file_sha in seen_shas: continue else: seen_shas.add(file_sha) # Add file info commit['file'] = file_data versions.append(file_data) return versions def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True): """Internal method for getting a diff between two commits Please use .diff method unless you have very specific needs """ # If commit is first commit (new_commit_sha == old_commit_sha) # then compare to an empty tree if new_commit_sha == old_commit_sha: old_tree = Tree() else: old_tree = self._commit_tree(old_commit_sha) new_tree = self._commit_tree(new_commit_sha) return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary) def changes(self, *args, **kwargs): """ List of changes between two SHAs Returns a list of lists of tuples : [ [ (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) ], ... ] """ kwargs['diff_type'] = 'changes' return self.diff(*args, **kwargs) def changes_count(self, *args, **kwargs): return len(self.changes(*args, **kwargs)) def _refs_by_pattern(self, pattern): refs = self.refs def item_filter(key_value): """Filter only concered refs""" key, value = key_value return key.startswith(pattern) def item_map(key_value): """Rewrite keys""" key, value = key_value new_key = key[len(pattern):] return (new_key, value) return dict( map(item_map, filter( item_filter, refs.items() ) ) ) @property def refs(self): return self.repo.get_refs() def set_refs(refs_dict): for k, v in refs_dict.items(): self.repo[k] = v def import_refs(self, base, other): return self.repo.refs.import_refs(base, other) @property def branches(self): return self._refs_by_pattern(self.REFS_BRANCHES) @property def active_branch(self): """Returns the name of the active branch, or None, if HEAD is detached """ x = self.repo.refs.read_ref('HEAD') if not x.startswith(SYMREF): return None else: symref = x[len(SYMREF):] if not symref.startswith(self.REFS_BRANCHES): return None else: return symref[len(self.REFS_BRANCHES):] @property def active_sha(self): """Deprecated equivalent to head property """ return self.head @property def remote_branches(self): return self._refs_by_pattern(self.REFS_REMOTES) @property def tags(self): return self._refs_by_pattern(self.REFS_TAGS) @property def remotes(self): """ Dict of remotes { 'origin': 'http://friendco.de/some_user/repo.git', ... } """ config = self.repo.get_config() return { keys[1]: values['url'] for keys, values in config.items() if keys[0] == 'remote' } def add_remote(self, remote_name, remote_url): # Get repo's config config = self.repo.get_config() # Add new entries for remote config.set(('remote', remote_name), 'url', remote_url) config.set(('remote', remote_name), 'fetch', "+refs/heads/*:refs/remotes/%s/*" % remote_name) # Write to disk config.write_to_path() return remote_name def add_ref(self, new_ref, old_ref): self.repo.refs[new_ref] = old_ref self.update_server_info() def remove_ref(self, ref_name): # Returns False if ref doesn't exist if not ref_name in self.repo.refs: return False del self.repo.refs[ref_name] self.update_server_info() return True def create_branch(self, base_branch, new_branch, tracking=None): """Try creating a new branch which tracks the given remote if such a branch does not exist then branch off a local branch """ # The remote to track tracking = self.DEFAULT_REMOTE # Already exists if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) # Get information about remote_branch remote_branch = os.path.sep.join([tracking, base_branch]) # Fork Local if base_branch in self.branches: base_ref = self._format_ref_branch(base_branch) # Fork remote elif remote_branch in self.remote_branches: base_ref = self._format_ref_remote(remote_branch) # TODO : track else: raise Exception("Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking)) # Reference of new branch new_ref = self._format_ref_branch(new_branch) # Copy reference to create branch self.add_ref(new_ref, base_ref) return new_ref def create_orphan_branch(self, new_branch, empty_index=None): """ Create a new branch with no commits in it. Technically, just points HEAD to a non-existent branch. The actual branch will only be created if something is committed. This is equivalent to: git checkout --orphan <new_branch>, Unless empty_index is set to True, in which case the index will be emptied along with the file-tree (which is always emptied). Against a clean working tree, this is equivalent to: git checkout --orphan <new_branch> git reset --merge """ if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) new_ref = self._format_ref_branch(new_branch) self.repo.refs.set_symbolic_ref('HEAD', new_ref) if self.is_working: if empty_index: self.rm_all() self.clean_working() return new_ref def remove_branch(self, branch_name): ref = self._format_ref_branch(branch_name) return self.remove_ref(ref) def switch_branch(self, branch_name, tracking=None, create=None): """Changes the current branch """ if create is None: create = True # Check if branch exists if not branch_name in self.branches: self.create_branch(branch_name, branch_name, tracking=tracking) # Get branch reference branch_ref = self._format_ref_branch(branch_name) # Change main branch self.repo.refs.set_symbolic_ref('HEAD', branch_ref) if self.is_working: # Remove all files self.clean_working() # Add files for the current branch self.checkout_all() def create_tag(self, tag_name, target): ref = self._format_ref_tag(tag_name) return self.add_ref(ref, self._parse_reference(target)) def remove_tag(self, tag_name): ref = self._format_ref_tag(tag_name) return self.remove_ref(ref) def clean(self, force=None, directories=None): untracked_files = self.untracked_files map(os.remove, untracked_files) return untracked_files def clean_working(self): """Purges all the working (removes everything except .git) used by checkout_all to get clean branch switching """ return self.clean() def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None): tree = self[tree_sha] structure = {} if depth is None: depth = self.MAX_TREE_DEPTH elif depth == 0: return structure for entry in tree.items(): # tree if entry.mode == self.MODE_DIRECTORY: # Recur structure[entry.path] = self._get_fs_structure(entry.sha, depth=depth - 1, parent_sha=tree_sha) # commit else: structure[entry.path] = entry.sha structure['.'] = tree_sha structure['..'] = parent_sha or tree_sha return structure def _get_fs_structure_by_path(self, tree_sha, path): parts = path.split(os.path.sep) depth = len(parts) + 1 structure = self._get_fs_structure(tree_sha, depth=depth) return funky.subkey(structure, parts) def commit_ls(self, ref, subpath=None): """List a "directory" for a given commit using the tree of that commit """ tree_sha = self._commit_tree(ref) # Root path if subpath in self.ROOT_PATHS or not subpath: return self._get_fs_structure(tree_sha, depth=1) # Any other path return self._get_fs_structure_by_path(tree_sha, subpath) def commit_file(self, ref, path): """Return info on a given file for a given commit """ name, info = self.get_commit_files(ref, paths=[path]).items()[0] return info def commit_tree(self, ref, *args, **kwargs): tree_sha = self._commit_tree(ref) return self._get_fs_structure(tree_sha, *args, **kwargs) def update_server_info(self): if not self.is_bare: return update_server_info(self.repo) def _is_fast_forward(self): pass def _merge_fast_forward(self): pass def __hash__(self): """This is required otherwise the memoize function will just mess it up """ return hash(self.path) def __getitem__(self, key): try: sha = self._parse_reference(key) except: raise KeyError(key) return self.repo[sha] def __setitem__(self, key, value): try: key = self.dwim_reference(key) except: pass self.repo[key] = value def __contains__(self, key): try: key = self.dwim_reference(key) except: pass return key in self.repo def __delitem__(self, key): try: key = self.dwim_reference(key) except: raise KeyError(key) self.remove_ref(key) # Alias to clone_bare fork = clone_bare log = commit_info diff_count = changes_count contributors = recent_contributors
class DulwichBackend(BaseGitBackend): # pylint:disable=abstract-method """Dulwich Git backend.""" # Dulwich progress will return messages equivalent to git CLI, # our pbars should just display the messages as formatted by dulwich BAR_FMT_NOTOTAL = "{desc}{bar:b}|{postfix[info]} [{elapsed}]" def __init__( # pylint:disable=W0231 self, root_dir=os.curdir, search_parent_directories=True): from dulwich.errors import NotGitRepository from dulwich.repo import Repo try: if search_parent_directories: self.repo = Repo.discover(start=root_dir) else: self.repo = Repo(root_dir) except NotGitRepository as exc: raise SCMError(f"{root_dir} is not a git repository") from exc self._submodules: Dict[str, "PathInfo"] = self._find_submodules() self._stashes: dict = {} def _find_submodules(self) -> Dict[str, "PathInfo"]: """Return dict mapping submodule names to submodule paths. Submodule paths will be relative to Git repo root. """ from dulwich.config import ConfigFile, parse_submodules submodules: Dict[str, "PathInfo"] = {} config_path = os.path.join(self.root_dir, ".gitmodules") if os.path.isfile(config_path): config = ConfigFile.from_path(config_path) for path, _url, section in parse_submodules(config): submodules[os.fsdecode(section)] = PathInfo(os.fsdecode(path)) return submodules def close(self): self.repo.close() @property def root_dir(self) -> str: return self.repo.path @staticmethod def clone( url: str, to_path: str, rev: Optional[str] = None, shallow_branch: Optional[str] = None, ): raise NotImplementedError @property def dir(self) -> str: return self.repo.commondir() def add(self, paths: Union[str, Iterable[str]], update=False): from dvc.utils.fs import walk_files assert paths or update if isinstance(paths, str): paths = [paths] if update and not paths: self.repo.stage(list(self.repo.open_index())) return files: List[bytes] = [] for path in paths: if not os.path.isabs(path) and self._submodules: # NOTE: If path is inside a submodule, Dulwich expects the # staged paths to be relative to the submodule root (not the # parent git repo root). We append path to root_dir here so # that the result of relpath(path, root_dir) is actually the # path relative to the submodule root. path_info = PathInfo(path).relative_to(self.root_dir) for sm_path in self._submodules.values(): if path_info.isin(sm_path): path = os.path.join(self.root_dir, path_info.relative_to(sm_path)) break if os.path.isdir(path): files.extend( os.fsencode(relpath(fpath, self.root_dir)) for fpath in walk_files(path)) else: files.append(os.fsencode(relpath(path, self.root_dir))) # NOTE: this doesn't check gitignore, same as GitPythonBackend.add if update: index = self.repo.open_index() if os.name == "nt": # NOTE: we need git/unix separator to compare against index # paths but repo.stage() expects to be called with OS paths self.repo.stage([ fname for fname in files if fname.replace(b"\\", b"/") in index ]) else: self.repo.stage([fname for fname in files if fname in index]) else: self.repo.stage(files) def commit(self, msg: str, no_verify: bool = False): from dulwich.errors import CommitError from dulwich.porcelain import commit from dulwich.repo import InvalidUserIdentity try: commit(self.root_dir, message=msg, no_verify=no_verify) except CommitError as exc: raise SCMError("Git commit failed") from exc except InvalidUserIdentity as exc: raise SCMError( "Git username and email must be configured") from exc def checkout( self, branch: str, create_new: Optional[bool] = False, force: bool = False, **kwargs, ): raise NotImplementedError def pull(self, **kwargs): raise NotImplementedError def push(self): raise NotImplementedError def branch(self, branch: str): from dulwich.porcelain import Error, branch_create try: branch_create(self.root_dir, branch) except Error as exc: raise SCMError(f"Failed to create branch '{branch}'") from exc def tag(self, tag: str): raise NotImplementedError def untracked_files(self) -> Iterable[str]: _staged, _unstaged, untracked = self.status() return untracked def is_tracked(self, path: str) -> bool: rel = PathInfo(path).relative_to(self.root_dir).as_posix().encode() rel_dir = rel + b"/" for path in self.repo.open_index(): if path == rel or path.startswith(rel_dir): return True return False def is_dirty(self, untracked_files: bool = False) -> bool: staged, unstaged, untracked = self.status() return bool(staged or unstaged or (untracked_files and untracked)) def active_branch(self) -> str: raise NotImplementedError def list_branches(self) -> Iterable[str]: raise NotImplementedError def list_tags(self) -> Iterable[str]: raise NotImplementedError def list_all_commits(self) -> Iterable[str]: raise NotImplementedError def get_tree_obj(self, rev: str, **kwargs) -> DulwichObject: from dulwich.objectspec import parse_tree tree = parse_tree(self.repo, rev) return DulwichObject(self.repo, ".", stat.S_IFDIR, tree.id) def get_rev(self) -> str: rev = self.get_ref("HEAD") if rev: return rev raise SCMError("Empty git repo") def resolve_rev(self, rev: str) -> str: raise NotImplementedError def resolve_commit(self, rev: str) -> "GitCommit": raise NotImplementedError def _get_stash(self, ref: str): from dulwich.stash import Stash as DulwichStash if ref not in self._stashes: self._stashes[ref] = DulwichStash(self.repo, ref=os.fsencode(ref)) return self._stashes[ref] @cached_property def ignore_manager(self): from dulwich.ignore import IgnoreFilterManager return IgnoreFilterManager.from_repo(self.repo) def is_ignored(self, path: str) -> bool: # `is_ignored` returns `false` if excluded in `.gitignore` and # `None` if it's not mentioned at all. `True` if it is ignored. return bool( self.ignore_manager.is_ignored(relpath(path, self.root_dir))) def set_ref( self, name: str, new_ref: str, old_ref: Optional[str] = None, message: Optional[str] = None, symbolic: Optional[bool] = False, ): name_b = os.fsencode(name) new_ref_b = os.fsencode(new_ref) old_ref_b = os.fsencode(old_ref) if old_ref else None message_b = message.encode("utf-8") if message else None if symbolic: return self.repo.refs.set_symbolic_ref(name_b, new_ref_b, message=message_b) if not self.repo.refs.set_if_equals( name_b, old_ref_b, new_ref_b, message=message_b): raise SCMError(f"Failed to set '{name}'") def get_ref(self, name, follow: bool = True) -> Optional[str]: from dulwich.refs import parse_symref_value name_b = os.fsencode(name) if follow: try: ref = self.repo.refs[name_b] except KeyError: ref = None else: ref = self.repo.refs.read_ref(name_b) try: if ref: ref = parse_symref_value(ref) except ValueError: pass if ref: return os.fsdecode(ref) return None def remove_ref(self, name: str, old_ref: Optional[str] = None): name_b = name.encode("utf-8") old_ref_b = old_ref.encode("utf-8") if old_ref else None if not self.repo.refs.remove_if_equals(name_b, old_ref_b): raise SCMError(f"Failed to remove '{name}'") def iter_refs(self, base: Optional[str] = None): base_b = os.fsencode(base) if base else None for key in self.repo.refs.keys(base=base_b): if base: if base.endswith("/"): base = base[:-1] yield "/".join([base, os.fsdecode(key)]) else: yield os.fsdecode(key) def iter_remote_refs(self, url: str, base: Optional[str] = None): from dulwich.client import get_transport_and_path from dulwich.porcelain import get_remote_repo try: _remote, location = get_remote_repo(self.repo, url) client, path = get_transport_and_path(location) except Exception as exc: raise SCMError( f"'{url}' is not a valid Git remote or URL") from exc if base: yield from (os.fsdecode(ref) for ref in client.get_refs(path) if ref.startswith(os.fsencode(base))) else: yield from (os.fsdecode(ref) for ref in client.get_refs(path)) def get_refs_containing(self, rev: str, pattern: Optional[str] = None): raise NotImplementedError def push_refspec( self, url: str, src: Optional[str], dest: str, force: bool = False, on_diverged: Optional[Callable[[str, str], bool]] = None, ): from dulwich.client import get_transport_and_path from dulwich.errors import NotGitRepository, SendPackError from dulwich.porcelain import ( DivergedBranches, check_diverged, get_remote_repo, ) dest_refs, values = self._push_dest_refs(src, dest) try: _remote, location = get_remote_repo(self.repo, url) client, path = get_transport_and_path(location) except Exception as exc: raise SCMError( f"'{url}' is not a valid Git remote or URL") from exc def update_refs(refs): new_refs = {} for ref, value in zip(dest_refs, values): if ref in refs: local_sha = self.repo.refs[ref] remote_sha = refs[ref] try: check_diverged(self.repo, remote_sha, local_sha) except DivergedBranches: if not force: overwrite = False if on_diverged: overwrite = on_diverged( os.fsdecode(ref), os.fsdecode(remote_sha), ) if not overwrite: continue new_refs[ref] = value return new_refs try: with Tqdm(desc="Pushing git refs", bar_format=self.BAR_FMT_NOTOTAL) as pbar: def progress(msg_b): msg = msg_b.decode("ascii").strip() pbar.update_msg(msg) pbar.refresh() logger.trace(msg) client.send_pack( path, update_refs, self.repo.object_store.generate_pack_data, progress=progress, ) except (NotGitRepository, SendPackError) as exc: raise SCMError("Git failed to push '{src}' to '{url}'") from exc def _push_dest_refs(self, src: Optional[str], dest: str) -> Tuple[Iterable[bytes], Iterable[bytes]]: from dulwich.objects import ZERO_SHA if src is not None and src.endswith("/"): src_b = os.fsencode(src) keys = self.repo.refs.subkeys(src_b) values = [self.repo.refs[b"".join([src_b, key])] for key in keys] dest_refs = [b"".join([os.fsencode(dest), key]) for key in keys] else: if src is None: values = [ZERO_SHA] else: values = [self.repo.refs[os.fsencode(src)]] dest_refs = [os.fsencode(dest)] return dest_refs, values def fetch_refspecs( self, url: str, refspecs: Iterable[str], force: Optional[bool] = False, on_diverged: Optional[Callable[[str, str], bool]] = None, ): from dulwich.client import get_transport_and_path from dulwich.objectspec import parse_reftuples from dulwich.porcelain import ( DivergedBranches, check_diverged, get_remote_repo, ) fetch_refs = [] def determine_wants(remote_refs): fetch_refs.extend( parse_reftuples( remote_refs, self.repo.refs, [os.fsencode(refspec) for refspec in refspecs], force=force, )) return [ remote_refs[lh] for (lh, _, _) in fetch_refs if remote_refs[lh] not in self.repo.object_store ] try: _remote, location = get_remote_repo(self.repo, url) client, path = get_transport_and_path(location) except Exception as exc: raise SCMError( f"'{url}' is not a valid Git remote or URL") from exc with Tqdm(desc="Fetching git refs", bar_format=self.BAR_FMT_NOTOTAL) as pbar: def progress(msg_b): msg = msg_b.decode("ascii").strip() pbar.update_msg(msg) pbar.refresh() logger.trace(msg) fetch_result = client.fetch( path, self.repo, progress=progress, determine_wants=determine_wants, ) for (lh, rh, _) in fetch_refs: try: if rh in self.repo.refs: check_diverged(self.repo, self.repo.refs[rh], fetch_result.refs[lh]) except DivergedBranches: if not force: overwrite = False if on_diverged: overwrite = on_diverged( os.fsdecode(rh), os.fsdecode(fetch_result.refs[lh])) if not overwrite: continue self.repo.refs[rh] = fetch_result.refs[lh] def _stash_iter(self, ref: str): stash = self._get_stash(ref) yield from stash.stashes() def _stash_push( self, ref: str, message: Optional[str] = None, include_untracked: Optional[bool] = False, ) -> Tuple[Optional[str], bool]: from dulwich.repo import InvalidUserIdentity from dvc.scm.git import Stash if include_untracked or ref == Stash.DEFAULT_STASH: # dulwich stash.push does not support include_untracked and does # not touch working tree raise NotImplementedError stash = self._get_stash(ref) message_b = message.encode("utf-8") if message else None try: rev = stash.push(message=message_b) except InvalidUserIdentity as exc: raise SCMError( "Git username and email must be configured") from exc return os.fsdecode(rev), True def _stash_apply(self, rev: str): raise NotImplementedError def _stash_drop(self, ref: str, index: int): from dvc.scm.git import Stash if ref == Stash.DEFAULT_STASH: raise NotImplementedError stash = self._get_stash(ref) try: stash.drop(index) except ValueError as exc: raise SCMError("Failed to drop stash entry") from exc def describe( self, rev: str, base: Optional[str] = None, match: Optional[str] = None, exclude: Optional[str] = None, ) -> Optional[str]: if not base: base = "refs/tags" for ref in self.iter_refs(base=base): if (match and not fnmatch.fnmatch(ref, match)) or ( exclude and fnmatch.fnmatch(ref, exclude)): continue if self.get_ref(ref, follow=False) == rev: return ref return None def diff(self, rev_a: str, rev_b: str, binary=False) -> str: from dulwich.patch import write_tree_diff commit_a = self.repo[os.fsencode(rev_a)] commit_b = self.repo[os.fsencode(rev_b)] buf = BytesIO() write_tree_diff(buf, self.repo.object_store, commit_a.tree, commit_b.tree) return buf.getvalue().decode("utf-8") def reset(self, hard: bool = False, paths: Iterable[str] = None): raise NotImplementedError def checkout_index( self, paths: Optional[Iterable[str]] = None, force: bool = False, ours: bool = False, theirs: bool = False, ): raise NotImplementedError def status( self, ignored: bool = False ) -> Tuple[Mapping[str, Iterable[str]], Iterable[str], Iterable[str]]: from dulwich.porcelain import status as git_status staged, unstaged, untracked = git_status(self.root_dir, ignored=ignored) return ( { status: [os.fsdecode(name) for name in paths] for status, paths in staged.items() if paths }, [os.fsdecode(name) for name in unstaged], [os.fsdecode(name) for name in untracked], ) def _reset(self) -> None: self.__dict__.pop("ignore_manager", None) def merge( self, rev: str, commit: bool = True, msg: Optional[str] = None, squash: bool = False, ) -> Optional[str]: raise NotImplementedError
class Wiki(HookMixin): path = None base_path = '/' default_ref = 'master' default_committer_name = 'Anon' default_committer_email = '*****@*****.**' index_page = 'home' repo = None def __init__(self, path): try: self.repo = Repo(path) except NotGitRepository: self.repo = Repo.init(path, mkdir=True) # TODO add first commit here self.path = path def __repr__(self): return "Wiki: {0}".format(self.path) def commit(self, name, email, message, files): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ if isinstance(name, text_type): name = name.encode('utf-8') if isinstance(email, text_type): email = email.encode('utf-8') if isinstance(message, text_type): message = message.encode('utf-8') author = committer = "{0} <{1}>".format(name, email).encode() self.repo.stage(files) return self.repo.do_commit(message=message, committer=committer, author=author) def get_page(self, name, sha='HEAD'): """Get page data, partials, commit info. :param name: Name of page. :param sha: Commit sha. :return: dict """ return WikiPage(name, self, sha=sha) def get_index(self): """Get repo index of head. :return: list -- List of dicts """ rv = [] index = self.repo.open_index() for name in index: rv.append(dict(name=filename_to_cname(name), filename=name, ctime=index[name].ctime[0], mtime=index[name].mtime[0], sha=index[name].sha, size=index[name].size)) return rv
#!/usr/bin/env python3 import os, sys, socket, traceback, json, yaml, getpass from dulwich.repo import Repo repo_path = os.path.realpath(os.path.expanduser('~/.myrepo')) if not os.path.exists(repo_path): os.mkdir(repo_path) if not os.path.exists('{}/.git'.format(repo_path)): repo = Repo.init(repo_path) else: repo = Repo(repo_path) yaml.dump(repo, sys.stdout) index = repo.open_index() MSG = f' repo index path={index.path}, index list={list(index)}, ' yaml.dump(MSG, sys.stdout) f = open(f'{repo_path}/foo', 'wb') _ = f.write(b"monty1") f.close() repo.stage([b"foo"]) print(",".join( [f.decode(sys.getfilesystemencoding()) for f in repo.open_index()])) commit_id = repo.do_commit( b"The first commit") #, committer=getpass.getuser().encode()) print(f' commit_id={commit_id}, repo_head = {repo.head()} ') #repo = Repo("myrepo")
def find_git_mtimes(self, context, silent_build): """ Use git to find the mtimes of the files we care about """ if not context.use_git_timestamps: return {} parent_dir = context.parent_dir root_folder = context.git_root # Can't use git timestamps if it's just a shallow clone # Otherwise all the files get the timestamp of the latest commit if context.use_git_timestamps and os.path.exists( os.path.join(root_folder, ".git", "shallow")): raise HarpoonError("Can't get git timestamps from a shallow clone", directory=parent_dir) git = Repo(root_folder) mtimes = {} all_files = set(git.open_index()) use_files = set() for filename in all_files: relpath = os.path.relpath(os.path.join(root_folder, filename), context.parent_dir) # Only include files under the parent_dir if relpath.startswith("../"): continue # Ignore files that we don't want git_timestamps from if context.use_git_timestamps and type( context.use_git_timestamps) is not bool: match = False for line in context.use_git_timestamps: if fnmatch.fnmatch(relpath, line): match = True break if not match: continue # Matched is true by default if # * Have context.exclude # * No context.exclude and no context.include matched = context.exclude or not any( [context.exclude, context.include]) # Anything not matching exclude gets included if context.exclude: for line in context.exclude: if fnmatch.fnmatch(relpath, line): matched = False # Anything matching include gets included if context.include: for line in context.include: if fnmatch.fnmatch(relpath, line): matched = True break # Either didn't match any exclude or matched an include if matched: use_files.add(filename) if not silent_build: log.info( "Finding modified times for %s/%s git controlled files in %s", len(use_files), len(all_files), root_folder) for entry in git.get_walker(paths=use_files): date = entry.commit.author_time for changes in entry.changes(): if type(changes) is not list: changes = [changes] for change in changes: path = change.new.path if root_folder and change.new.path and context.parent_dir: new_relpath = os.path.relpath( os.path.join(root_folder, change.new.path), context.parent_dir) if path in use_files and mtimes.get( new_relpath, 0 ) < date and not new_relpath.startswith("../"): mtimes[new_relpath] = date if len(use_files - set(mtimes)) == 0: break return mtimes
class Repo(object): """ Wrapper around a libgit Repository that knows: * How to get all the files in the repository * How to get the oid of HEAD * How to get the commit times of the files we want commit times for It's written with speed in mind, given the constraints of making performant code in python! """ def __init__(self, root_folder): self.git = Repository(root_folder) def all_files(self): """Return a set of all the files under git control""" return set([entry.decode() for entry, _ in self.git.open_index().items()]) @property def first_commit(self): """Return the oid of HEAD""" return self.git.head().decode() def file_commit_times(self, use_files_paths, debug=False): """ Traverse the commits in the repository, starting from HEAD until we have found the commit times for all the files we care about. Yield each file once, only when it is found to be changed in some commit. If self.debug is true, also output log.debug for the speed we are going through commits (output commits/second every 1000 commits and every 100000 commits) """ prefixes = PrefixTree() prefixes.fill(use_files_paths) for entry in self.git.get_walker(): # Commit time taking into account the timezone commit_time = entry.commit.commit_time - entry.commit.commit_timezone # Get us the two different tree structures between parents and current cf_and_pf, changes = self.tree_structures_for(() , entry.commit.tree , [self.git.get_object(oid).tree for oid in entry.commit.parents] , prefixes ) # Deep dive into any differences difference = [] if changes: cfs_and_pfs = [(cf_and_pf, changes)] while cfs_and_pfs: nxt, changes = cfs_and_pfs.pop(0) for thing, changes, is_path in self.differences_between(nxt[0], nxt[1], changes, prefixes): if is_path: found = prefixes.remove(thing[:-1], thing[-1]) if found: difference.append('/'.join(thing)) else: cfs_and_pfs.append((thing, changes)) # Only yield if there was a difference if difference: yield entry.commit.sha().hexdigest(), commit_time, difference # If nothing remains, then break! if not prefixes: break def entries_in_tree_oid(self, prefix, tree_oid): """Find the tree at this oid and return entries prefixed with ``prefix``""" try: tree = self.git.get_object(tree_oid) except KeyError: log.warning("Couldn't find object {0}".format(tree_oid)) return empty else: return frozenset(self.entries_in_tree(prefix, tree)) def entries_in_tree(self, prefix, tree): """ Traverse the entries in this tree and yield (prefix, is_tree, oid) Where prefix is a tuple of the given prefix and the name of the entry. """ for entry in tree.items(): if prefix: new_prefix = prefix + (entry.path.decode(), ) else: new_prefix = (entry.path.decode(), ) yield (new_prefix, stat.S_ISDIR(entry.mode), entry.sha) def tree_structures_for(self, prefix, current_oid, parent_oids, prefixes): """ Return the entries for this commit, the entries of the parent commits, and the difference between the two (current_files - parent_files) """ if prefix and prefixes and prefix not in prefixes: return empty, empty parent_files = set() for oid in parent_oids: parent_files.update(self.entries_in_tree_oid(prefix, oid)) current_files = self.entries_in_tree_oid(prefix, current_oid) return (current_files, parent_files), (current_files - parent_files) def differences_between(self, current_files, parent_files, changes, prefixes): """ yield (thing, changes, is_path) If is_path is true, changes is None and thing is the path as a tuple. If is_path is false, thing is the current_files and parent_files for that changed treeentry and changes is the difference between current_files and parent_files. The code here is written to squeeze as much performance as possible out of this operation. """ parent_oid = None if any(is_tree for _, is_tree, _ in changes): if len(changes) == 1: wanted_path = list(changes)[0][0] parent_oid = frozenset([oid for path, is_tree, oid in parent_files if path == wanted_path and is_tree]) else: parent_values = defaultdict(set) parent_changes = parent_files - current_files for path, is_tree, oid in parent_changes: if is_tree: parent_values[path].add(oid) for path, is_tree, oid in changes: if is_tree and path not in prefixes: continue if not is_tree: yield path, None, True else: parent_oids = parent_oid if parent_oid is not None else parent_values.get(path, empty) cf_and_pf, changes = self.tree_structures_for(path, oid, parent_oids, prefixes) if changes: yield cf_and_pf, changes, False
class Unleash(object): def _create_child_commit(self, parent_ref): parent = ResolvedRef(self.repo, parent_ref) if not parent.is_definite: raise InvocationError('{} is ambiguous: {}'.format( parent.ref, parent.full_name )) if not parent.found: raise InvocationError('Could not resolve "{}"'.format(parent.ref)) # prepare the release commit commit = MalleableCommit.from_existing( self.repo, parent.id ) # update author and such if opts['author'] is None: commit.author = '{} <{}>'.format( self.gitconfig.get('user', 'name'), self.gitconfig.get('user', 'email'), ) commit.commiter = commit.author else: commit.author = opts['author'] commit.committer = opts['author'] now = int(time.time()) ltz = get_local_timezone(now) commit.author_time = now commit.author_timezone = ltz commit.commit_time = now commit.commit_timezone = ltz commit.parent_ids = [parent.id] return commit def __init__(self, plugins=[]): self.plugins = plugins def _init_repo(self): self.repo = Repo(opts['root']) self.gitconfig = self.repo.get_config_stack() def _perform_step(self, signal_name): log.debug('begin: {}'.format(signal_name)) begin = time.time() # create new top-level context with new_local_stack() as nc: nc['issues'] = issues.channel(signal_name) self.plugins.notify(signal_name) duration = time.time() - begin log.debug('end: {}, took {:.4f}s'.format(signal_name, duration)) def create_release(self, ref): with new_local_stack() as nc: # resolve reference base_ref = ResolvedRef(self.repo, ref) log.debug( 'Base ref: {} ({})'.format(base_ref.full_name, base_ref.id) ) orig_tree = base_ref.get_object().tree # initialize context nc['commit'] = self._create_child_commit(ref) nc['issues'] = IssueCollector(log=log) nc['info'] = {'ref': base_ref} nc['log'] = log try: self._perform_step('collect_info') log.debug('info: {}'.format(pformat(info))) self._perform_step('prepare_release') self._perform_step('lint_release') if opts['inspect']: log.info(unicode(commit)) # check out to temporary directory with TempDir() as inspect_dir: commit.export_to(inspect_dir) log.info( 'You are being dropped into an interactive shell ' 'inside a temporary checkout of the release ' 'commit. No changes you make will persist. Exit ' 'the shell to abort the release process.\n\n' 'Use "exit 2" to continue the release.' ) status = run_user_shell(cwd=inspect_dir) if status != 2: raise InvocationError( 'Aborting release, got exit code {} from shell.'. format(status)) # save release commit release_commit = nc['commit'] # we're done with the release, now create the dev commit nc['commit'] = self._create_child_commit(ref) nc['issues'] = IssueCollector(log=log) # creating development commit self._perform_step('prepare_dev') if opts['dry_run']: log.info('Not saving created commits. Dry-run successful.') return # we've got both commits, now tag the release confirm_prompt( 'Advance dev to {} and release {}?' .format(info['dev_version'], info['release_version']) ) release_tag = 'refs/tags/{}'.format(info['release_version']) if release_tag in self.repo.refs: confirm_prompt( 'Repository already contains {}, really overwrite tag?' .format(release_tag), ) release_hash = release_commit.save() log.info('{}: {}'.format(release_tag, release_hash)) self.repo.refs[release_tag] = release_hash # save the dev commit dev_hash = nc['commit'].save() # if our release commit formed from a branch, we set that branch # to our new dev commit assert base_ref.is_definite and base_ref.found if not base_ref.is_ref or\ not base_ref.full_name.startswith('refs/heads'): log.warning('Release commit does not originate from a ' 'branch; dev commit will not be reachable.') log.info('Dev commit: {}'.format(dev_hash)) else: self.repo.refs[base_ref.full_name] = dev_hash # change the branch to point at our new dev commit log.info('{}: {}'.format( base_ref.full_name, dev_hash )) self._update_working_copy(base_ref, orig_tree) except PluginError: # just abort, error has been logged already log.debug('Exiting due to PluginError') return def _update_working_copy(self, base_ref, orig_tree): head_ref = ResolvedRef(self.repo, 'HEAD') if not head_ref.is_definite or not head_ref.is_symbolic\ or not head_ref.target == base_ref.full_name: log.info('HEAD is not a symbolic ref to {}, leaving your ' 'working copy untouched.') return if not self.repo.has_index(): log.info('Repository has no index, not updating working copy.') return index = self.repo.open_index() changes = list(index.changes_from_tree( self.repo.object_store, orig_tree, )) if changes: log.warning('There are staged changes in your index. Will not ' 'update working copy.\n\n' 'You will need to manually change your HEAD to ' '{}.'.format(base_ref.id)) return # reset the index to the new dev commit confirm_prompt( 'Do you want to reset your index to the new dev commit and check ' 'it out? Unsaved changes to your working copy may be overwritten!' ) log.info('Resetting index and checking out dev commit.') build_index_from_tree( self.repo.path, self.repo.index_path(), self.repo.object_store, base_ref.get_object().tree, ) def publish(self, ref): if ref is None: tags = sorted( (t for t in self.repo.refs.as_dict().iteritems() if t[0].startswith('refs/tags')), key=lambda (_, sha): self.repo[sha].commit_time, reverse=True, ) if not tags: log.error('Could not find a tag to publish.') return ref = tags[0][0] pref = ResolvedRef(self.repo, ref) with new_local_stack() as nc: nc['commit'] = MalleableCommit.from_existing(self.repo, pref.id) log.debug('Release tag: {}'.format(commit)) nc['issues'] = IssueCollector(log=log) nc['info'] = {'ref': pref} nc['log'] = log try: self._perform_step('collect_info') log.debug('info: {}'.format(pformat(info))) self._perform_step('publish_release') except PluginError: log.debug('Exiting due to PluginError') return
class Efesto: def render_html(self, item): self.start_response('200 OK', [('Content-Type', 'text/html; charset=utf-8')]) header = self.get_html_template(self.header) footer = self.get_html_template(self.footer) sha = self.git_index[item][8] body = self.apply_vars(self.repo[sha].as_raw_string()) return [header, body, footer] def render_rst(self, item): self.start_response('200 OK', [('Content-Type', 'text/html; charset=utf-8')]) sha = self.git_index[item][8] blob = self.repo[sha] header = self.get_html_template(self.header) footer = self.get_html_template(self.footer) body = unicode( publish_parts(self.apply_vars(blob.as_raw_string()), writer_name='html')['html_body']).encode('utf8') return [header, self.prefix, body, self.suffix, footer] def __init__(self, path='.', prefix='', suffix='', header='header.html', footer='footer.html', notfound='notfound.html'): self.repo = Repo(path) self.prefix = prefix self.suffix = suffix self.header = header self.footer = footer self.notfound = notfound self.allowed_ext = {'html': self.render_html, 'rst': self.render_rst} def __call__(self, environ, start_response): self.start_response = start_response self.env = environ self.git_index = self.repo.open_index() requested_item = environ['PATH_INFO'][1:].rstrip('/') if requested_item == '': requested_item = 'index' self.page = requested_item return self.render_page() def render_page(self): for ext in self.allowed_ext.keys(): if "%s.%s" % (self.page, ext) in self.git_index: return self.allowed_ext[ext]("%s.%s" % (self.page, ext)) return self.render_notfound() def apply_vars(self, body): for env in self.env.keys(): body = body.replace("|%s|" % env, str(self.env[env])) return body def get_html_template(self, html): path = self.page while True: current_path = path path = '/'.join(path.split('/')[:-1]) if current_path == path: break item = ("%s/%s" % (path, html))[1:] if item in self.git_index: sha = self.git_index[item][8] return self.apply_vars(self.repo[sha].as_raw_string()) return '' def render_notfound(self): self.start_response('404 Not Found', [('Content-Type', 'text/html; charset=utf-8')]) header = self.get_html_template(self.header) footer = self.get_html_template(self.footer) body = self.get_html_template(self.notfound) if body == '': body = '<h1>Not Found</h1>' return [header, body, footer]
#!/usr/bin/env python2 import os.path import urlparse from email.utils import formatdate from dulwich.repo import Repo from dulwich.objects import Blob, Tree, Commit from docutils import io, nodes from docutils.core import publish_doctree, publish_from_doctree from render import MyWriter repo = Repo(".") commit_sha = repo.head() commit = repo.get_object(commit_sha) index = repo.open_index() assert not list(index.changes_from_tree(repo.object_store, commit.tree)), "uncommited changes" store = repo.object_store def render_rst(blob, path): doc = publish_doctree(blob.as_raw_string()) for node in doc.traverse(nodes.reference): uri = urlparse.urlparse(node['refuri']) if not uri.netloc and os.path.basename(uri.path) == "README.rst": node['refuri'] = urlparse.urlunparse( (uri.scheme, uri.netloc, uri.path[:-10] or "./", uri.params, uri.query, uri.fragment)) output = publish_from_doctree( doc, destination_path=path,
else: file_name = item[www+2:dotsomething] #print file_name outfile = open('./' + file_name + '.csv', "ab") writer = csv.writer(outfile) writer.writerow([clean_url]) #final_list.append(clean_url) #print final_list #remove '#' at beginning of line to print all pdf urls #auto commit repo = Repo("unscraper") repo index = repo.open_index() print(index.path.decode(sys.getfilesystemencoding())) list(index) f = open('unscraper/thisIsATest.md','wb') _ = f.write(b"monty") f.close() repo.stage([b"thisIsATest"]) print(",".join([f.decode(sys.getfilesystemencoding()) for f in repo.open_index()])) commit_id = repo.do_commit( b"testing dulwich", committer=b"Aly <*****@*****.**>")
class DulwichBackend(BaseGitBackend): # pylint:disable=abstract-method """Dulwich Git backend.""" def __init__( # pylint:disable=W0231 self, root_dir=os.curdir, search_parent_directories=True ): from dulwich.errors import NotGitRepository from dulwich.repo import Repo try: if search_parent_directories: self.repo = Repo.discover(start=root_dir) else: self.repo = Repo(root_dir) except NotGitRepository as exc: raise SCMError(f"{root_dir} is not a git repository") from exc self._stashes: dict = {} def close(self): self.repo.close() @property def root_dir(self) -> str: return self.repo.path @staticmethod def clone( url: str, to_path: str, rev: Optional[str] = None, shallow_branch: Optional[str] = None, ): raise NotImplementedError @staticmethod def is_sha(rev: str) -> bool: raise NotImplementedError @property def dir(self) -> str: return self.repo.commondir() def add(self, paths: Iterable[str]): from dvc.utils.fs import walk_files if isinstance(paths, str): paths = [paths] files = [] for path in paths: if not os.path.isabs(path): path = os.path.join(self.root_dir, path) if os.path.isdir(path): files.extend(walk_files(path)) else: files.append(path) for fpath in files: # NOTE: this doesn't check gitignore, same as GitPythonBackend.add self.repo.stage(relpath(fpath, self.root_dir)) def commit(self, msg: str): from dulwich.porcelain import commit commit(self.root_dir, message=msg) def checkout( self, branch: str, create_new: Optional[bool] = False, **kwargs, ): raise NotImplementedError def pull(self, **kwargs): raise NotImplementedError def push(self): raise NotImplementedError def branch(self, branch: str): raise NotImplementedError def tag(self, tag: str): raise NotImplementedError def untracked_files(self) -> Iterable[str]: raise NotImplementedError def is_tracked(self, path: str) -> bool: from dvc.path_info import PathInfo rel = PathInfo(path).relative_to(self.root_dir).as_posix().encode() rel_dir = rel + b"/" for path in self.repo.open_index(): if path == rel or path.startswith(rel_dir): return True return False def is_dirty(self, **kwargs) -> bool: raise NotImplementedError def active_branch(self) -> str: raise NotImplementedError def list_branches(self) -> Iterable[str]: raise NotImplementedError def list_tags(self) -> Iterable[str]: raise NotImplementedError def list_all_commits(self) -> Iterable[str]: raise NotImplementedError def get_tree(self, rev: str, **kwargs) -> BaseTree: raise NotImplementedError def get_rev(self) -> str: raise NotImplementedError def resolve_rev(self, rev: str) -> str: raise NotImplementedError def resolve_commit(self, rev: str) -> str: raise NotImplementedError def branch_revs(self, branch: str, end_rev: Optional[str] = None): raise NotImplementedError def _get_stash(self, ref: str): from dulwich.stash import Stash as DulwichStash if ref not in self._stashes: self._stashes[ref] = DulwichStash(self.repo, ref=os.fsencode(ref)) return self._stashes[ref] def is_ignored(self, path): from dulwich import ignore manager = ignore.IgnoreFilterManager.from_repo(self.repo) return manager.is_ignored(relpath(path, self.root_dir)) def set_ref( self, name: str, new_ref: str, old_ref: Optional[str] = None, message: Optional[str] = None, symbolic: Optional[bool] = False, ): name_b = os.fsencode(name) new_ref_b = os.fsencode(new_ref) old_ref_b = os.fsencode(old_ref) if old_ref else None message_b = message.encode("utf-8") if message else None if symbolic: return self.repo.refs.set_symbolic_ref( name_b, new_ref_b, message=message ) if not self.repo.refs.set_if_equals( name_b, old_ref_b, new_ref_b, message=message_b ): raise SCMError(f"Failed to set '{name}'") def get_ref(self, name, follow: Optional[bool] = True) -> Optional[str]: from dulwich.refs import parse_symref_value name_b = os.fsencode(name) if follow: try: ref = self.repo.refs[name_b] except KeyError: ref = None else: ref = self.repo.refs.read_ref(name_b) try: if ref: ref = parse_symref_value(ref) except ValueError: pass if ref: return os.fsdecode(ref) return None def remove_ref(self, name: str, old_ref: Optional[str] = None): name_b = name.encode("utf-8") old_ref_b = old_ref.encode("utf-8") if old_ref else None if not self.repo.refs.remove_if_equals(name_b, old_ref_b): raise SCMError(f"Failed to remove '{name}'") def iter_refs(self, base: Optional[str] = None): base_b = os.fsencode(base) if base else None for key in self.repo.refs.keys(base=base_b): if base: if base.endswith("/"): base = base[:-1] yield "/".join([base, os.fsdecode(key)]) else: yield os.fsdecode(key) def iter_remote_refs(self, url: str, base: Optional[str] = None): from dulwich.client import get_transport_and_path from dulwich.porcelain import get_remote_repo try: _remote, location = get_remote_repo(self.repo, url) client, path = get_transport_and_path(location) except Exception as exc: raise SCMError( f"'{url}' is not a valid Git remote or URL" ) from exc if base: yield from ( os.fsdecode(ref) for ref in client.get_refs(path) if ref.startswith(os.fsencode(base)) ) else: yield from (os.fsdecode(ref) for ref in client.get_refs(path)) def get_refs_containing(self, rev: str, pattern: Optional[str] = None): raise NotImplementedError def push_refspec( self, url: str, src: Optional[str], dest: str, force: bool = False, on_diverged: Optional[Callable[[str, str], bool]] = None, ): from dulwich.client import get_transport_and_path from dulwich.errors import NotGitRepository, SendPackError from dulwich.porcelain import ( DivergedBranches, check_diverged, get_remote_repo, ) dest_refs, values = self._push_dest_refs(src, dest) try: _remote, location = get_remote_repo(self.repo, url) client, path = get_transport_and_path(location) except Exception as exc: raise SCMError( f"'{url}' is not a valid Git remote or URL" ) from exc def update_refs(refs): new_refs = {} for ref, value in zip(dest_refs, values): if ref in refs: local_sha = self.repo.refs[ref] remote_sha = refs[ref] try: check_diverged(self.repo, remote_sha, local_sha) except DivergedBranches: if not force: overwrite = False if on_diverged: overwrite = on_diverged( os.fsdecode(ref), os.fsdecode(remote_sha), ) if not overwrite: continue new_refs[ref] = value return new_refs def progress(msg): logger.trace("git send_pack: %s", msg) try: client.send_pack( path, update_refs, self.repo.object_store.generate_pack_data, progress=progress, ) except (NotGitRepository, SendPackError) as exc: raise SCMError("Git failed to push '{src}' to '{url}'") from exc def _push_dest_refs( self, src: str, dest: str ) -> Tuple[Iterable[bytes], Iterable[bytes]]: from dulwich.objects import ZERO_SHA if src is not None and src.endswith("/"): src_b = os.fsencode(src) keys = self.repo.refs.subkeys(src_b) values = [self.repo.refs[b"".join([src_b, key])] for key in keys] dest_refs = [b"".join([os.fsencode(dest), key]) for key in keys] else: if src is None: values = [ZERO_SHA] else: values = [self.repo.refs[os.fsencode(src)]] dest_refs = [os.fsencode(dest)] return dest_refs, values def fetch_refspecs( self, url: str, refspecs: Iterable[str], force: Optional[bool] = False, on_diverged: Optional[Callable[[str, str], bool]] = None, ): from dulwich.client import get_transport_and_path from dulwich.objectspec import parse_reftuples from dulwich.porcelain import ( DivergedBranches, check_diverged, get_remote_repo, ) fetch_refs = [] def determine_wants(remote_refs): fetch_refs.extend( parse_reftuples( remote_refs, self.repo.refs, [os.fsencode(refspec) for refspec in refspecs], force=force, ) ) return [ remote_refs[lh] for (lh, _, _) in fetch_refs if remote_refs[lh] not in self.repo.object_store ] try: _remote, location = get_remote_repo(self.repo, url) client, path = get_transport_and_path(location) except Exception as exc: raise SCMError( f"'{url}' is not a valid Git remote or URL" ) from exc def progress(msg): logger.trace("git fetch: %s", msg) fetch_result = client.fetch( path, self.repo, progress=progress, determine_wants=determine_wants ) for (lh, rh, _) in fetch_refs: try: if rh in self.repo.refs: check_diverged( self.repo, self.repo.refs[rh], fetch_result.refs[lh] ) except DivergedBranches: if not force: overwrite = False if on_diverged: overwrite = on_diverged( os.fsdecode(rh), os.fsdecode(fetch_result.refs[lh]) ) if not overwrite: continue self.repo.refs[rh] = fetch_result.refs[lh] def _stash_iter(self, ref: str): stash = self._get_stash(ref) yield from stash.stashes() def _stash_push( self, ref: str, message: Optional[str] = None, include_untracked: Optional[bool] = False, ) -> Tuple[Optional[str], bool]: from dvc.scm.git import Stash if include_untracked or ref == Stash.DEFAULT_STASH: # dulwich stash.push does not support include_untracked and does # not touch working tree raise NotImplementedError stash = self._get_stash(ref) message_b = message.encode("utf-8") if message else None rev = stash.push(message=message_b) return os.fsdecode(rev), True def _stash_apply(self, rev: str): raise NotImplementedError def reflog_delete( self, ref: str, updateref: bool = False, rewrite: bool = False ): raise NotImplementedError def describe( self, rev: str, base: Optional[str] = None, match: Optional[str] = None, exclude: Optional[str] = None, ) -> Optional[str]: if not base: base = "refs/tags" for ref in self.iter_refs(base=base): if (match and not fnmatch.fnmatch(ref, match)) or ( exclude and fnmatch.fnmatch(ref, exclude) ): continue if self.get_ref(ref, follow=False) == rev: return ref return None def diff(self, rev_a: str, rev_b: str, binary=False) -> str: from dulwich.patch import write_tree_diff commit_a = self.repo[os.fsencode(rev_a)] commit_b = self.repo[os.fsencode(rev_b)] buf = BytesIO() write_tree_diff( buf, self.repo.object_store, commit_a.tree, commit_b.tree ) return buf.getvalue().decode("utf-8") def reset(self, hard: bool = False, paths: Iterable[str] = None): raise NotImplementedError def checkout_paths(self, paths: Iterable[str], force: bool = False): raise NotImplementedError
class Wiki(HookMixin): path = None base_path = '/' default_ref = 'master' default_committer_name = 'Anon' default_committer_email = '*****@*****.**' index_page = 'home' repo = None def __init__(self, path): try: self.repo = Repo(path) except NotGitRepository: self.repo = Repo.init(path, mkdir=True) # TODO add first commit here self.path = path def __repr__(self): return "Wiki: %s" % self.path def commit(self, name, email, message, files): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ if isinstance(name, text_type): name = name.encode('utf-8') if isinstance(email, text_type): email = email.encode('utf-8') if isinstance(message, text_type): message = message.encode('utf-8') author = committer = "%s <%s>".format(name, email).encode() self.repo.stage(files) return self.repo.do_commit(message=message, committer=committer, author=author) def get_page(self, name, sha='HEAD'): """Get page data, partials, commit info. :param name: Name of page. :param sha: Commit sha. :return: dict """ return WikiPage(name, self, sha=sha) def get_index(self): """Get repo index of head. :return: list -- List of dicts """ rv = [] index = self.repo.open_index() for name in index: rv.append( dict(name=filename_to_cname(name), filename=name, ctime=index[name].ctime[0], mtime=index[name].mtime[0], sha=index[name].sha, size=index[name].size)) return rv
def find_git_mtimes(self, context, silent_build): """ Use git to find the mtimes of the files we care about """ if not context.use_git_timestamps: return {} parent_dir = context.parent_dir root_folder = context.git_root # Can't use git timestamps if it's just a shallow clone # Otherwise all the files get the timestamp of the latest commit if context.use_git_timestamps and os.path.exists(os.path.join(root_folder, ".git", "shallow")): raise HarpoonError("Can't get git timestamps from a shallow clone", directory=parent_dir) git = Repo(root_folder) mtimes = {} all_files = set(git.open_index()) use_files = set() use_files_relpaths = set() for filename in all_files: relpath = os.path.relpath(os.path.join(root_folder, filename.decode('utf-8')), context.parent_dir) # Only include files under the parent_dir if relpath.startswith("../"): continue # Ignore files that we don't want git_timestamps from if context.use_git_timestamps and type(context.use_git_timestamps) is not bool: match = False for line in context.use_git_timestamps: if fnmatch.fnmatch(relpath, line): match = True break if not match: continue # Matched is true by default if # * Have context.exclude # * No context.exclude and no context.include matched = context.exclude or not any([context.exclude, context.include]) # Anything not matching exclude gets included if context.exclude: for line in context.exclude: if fnmatch.fnmatch(relpath, line): matched = False # Anything matching include gets included if context.include: for line in context.include: if fnmatch.fnmatch(relpath, line): matched = True break # Either didn't match any exclude or matched an include if matched: use_files.add(filename) use_files_relpaths.add(relpath) if not silent_build: log.info("Finding modified times for %s/%s git controlled files in %s", len(use_files), len(all_files), root_folder) first_commit = None cached_commit, cached_mtimes = self.get_cached_mtimes(root_folder, use_files_relpaths) for entry in git.get_walker(): if first_commit is None: first_commit = entry.commit.id.decode('utf-8') if cached_commit and entry.commit.id.decode('utf-8') == cached_commit: new_mtimes = cached_mtimes new_mtimes.update(mtimes) mtimes = new_mtimes break date = entry.commit.author_time added = False for changes in entry.changes(): if type(changes) is not list: changes = [changes] for change in changes: path = change.new.path if root_folder and change.new.path and context.parent_dir: if path in use_files: new_relpath = os.path.relpath(os.path.join(root_folder, change.new.path.decode('utf-8')), context.parent_dir).encode('utf-8') if not new_relpath.decode('utf-8').startswith("../"): if mtimes.get(new_relpath, 0) < date: mtimes[new_relpath] = date added = True if added: if len(use_files - set(mtimes)) == 0: break mtimes = dict((fn.decode('utf-8') if hasattr(fn, "decode") else fn, mtime) for fn, mtime in mtimes.items()) if first_commit != cached_commit: self.set_cached_mtimes(root_folder, first_commit, mtimes, use_files_relpaths) return mtimes
class Efesto: def render_html(self, item): self.start_response('200 OK', [('Content-Type','text/html; charset=utf-8')]) header = self.get_html_template(self.header) footer = self.get_html_template(self.footer) sha = self.git_index[item][8] body = self.apply_vars(self.repo[sha].as_raw_string()) return [header, body, footer] def render_rst(self, item): self.start_response('200 OK', [('Content-Type','text/html; charset=utf-8')]) sha = self.git_index[item][8] blob = self.repo[sha] header = self.get_html_template(self.header) footer = self.get_html_template(self.footer) body = unicode(publish_parts(self.apply_vars(blob.as_raw_string()), writer_name='html')['html_body']).encode('utf8') return [header, self.prefix, body, self.suffix, footer] def __init__(self, path='.',prefix='',suffix='',header='header.html',footer='footer.html', notfound='notfound.html'): self.repo = Repo(path) self.prefix = prefix self.suffix = suffix self.header = header self.footer = footer self.notfound = notfound self.allowed_ext = {'html':self.render_html, 'rst':self.render_rst} def __call__(self, environ, start_response): self.start_response = start_response self.env = environ self.git_index = self.repo.open_index() requested_item = environ['PATH_INFO'][1:].rstrip('/') if requested_item == '': requested_item = 'index' self.page = requested_item return self.render_page() def render_page(self): for ext in self.allowed_ext.keys(): if "%s.%s" % (self.page, ext) in self.git_index: return self.allowed_ext[ext]("%s.%s" % (self.page, ext)) return self.render_notfound() def apply_vars(self, body): for env in self.env.keys(): body = body.replace("|%s|" % env, str(self.env[env])) return body def get_html_template(self, html): path = self.page while True: current_path = path path = '/'.join(path.split('/')[:-1]) if current_path == path: break item = ("%s/%s" % (path, html))[1:] if item in self.git_index: sha = self.git_index[item][8] return self.apply_vars(self.repo[sha].as_raw_string()) return '' def render_notfound(self): self.start_response('404 Not Found', [('Content-Type','text/html; charset=utf-8')]) header = self.get_html_template(self.header) footer = self.get_html_template(self.footer) body = self.get_html_template(self.notfound) if body == '': body = '<h1>Not Found</h1>' return [header, body, footer]
class Gittle(object): """All paths used in Gittle external methods must be paths relative to the git repository """ DEFAULT_COMMIT = "HEAD" DEFAULT_BRANCH = "master" DEFAULT_REMOTE = "origin" DEFAULT_MESSAGE = "**No Message**" DEFAULT_USER_INFO = {"name": None, "email": None} DIFF_FUNCTIONS = { "classic": utils.git.classic_tree_diff, "dict": utils.git.dict_tree_diff, "changes": utils.git.dict_tree_diff, } DEFAULT_DIFF_TYPE = "dict" HIDDEN_REGEXES = [ # Hide git directory r".*\/\.git\/.*" ] # References REFS_BRANCHES = "refs/heads/" REFS_REMOTES = "refs/remotes/" REFS_TAGS = "refs/tags/" # Name pattern truths # Used for detecting if files are : # - deleted # - added # - changed PATTERN_ADDED = (False, True) PATTERN_REMOVED = (True, False) PATTERN_MODIFIED = (True, True) # Permissions MODE_DIRECTORY = 040000 # Used to tell if a tree entry is a directory # Tree depth MAX_TREE_DEPTH = 1000 # Acceptable Root paths ROOT_PATHS = (os.path.curdir, os.path.sep) def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs): if isinstance(repo_or_path, DulwichRepo): self.repo = repo_or_path elif isinstance(repo_or_path, Gittle): self.repo = DulwichRepo(repo_or_path.path) elif isinstance(repo_or_path, basestring): path = os.path.abspath(repo_or_path) self.repo = DulwichRepo(path) else: logging.warning("Repo is of type %s" % type(repo_or_path)) raise Exception("Gittle must be initialized with either a dulwich repository or a string to the path") # Set path self.path = self.repo.path # The remote url self.origin_uri = origin_uri # Report client activty self._report_activity = report_activity # Build ignore filter self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES) self.hidden_regexes.extend(self._get_ignore_regexes()) self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes) self.filters = [self.ignore_filter] # Get authenticator if auth: self.authenticator = auth else: self.auth(*args, **kwargs) def report_activity(self, *args, **kwargs): if not self._report_activity: return return self._report_activity(*args, **kwargs) def _format_author(self, name, email): return "%s <%s>" % (name, email) def _format_userinfo(self, userinfo): name = userinfo.get("name") email = userinfo.get("email") if name and email: return self._format_author(name, email) return None def _format_ref(self, base, extra): return "".join([base, extra]) def _format_ref_branch(self, branch_name): return self._format_ref(self.REFS_BRANCHES, branch_name) def _format_ref_remote(self, remote_name): return self._format_ref(self.REFS_REMOTES, remote_name) def _format_ref_tag(self, tag_name): return self._format_ref(self.REFS_TAGS, tag_name) @property def head(self): """Return SHA of the current HEAD """ return self.repo.head() @property def is_bare(self): """Bare repositories have no working directories or indexes """ return self.repo.bare @property def is_working(self): return not (self.is_bare) def has_index(self): """Opposite of is_bare """ return self.repo.has_index() @property def has_commits(self): """ If the repository has no HEAD we consider that is has no commits """ try: self.repo.head() except KeyError: return False return True def ref_walker(self, ref=None): """ Very simple, basic walker """ ref = ref or "HEAD" sha = self._commit_sha(ref) return self.repo.revision_history(sha) def branch_walker(self, branch): branch = branch or self.DEFAULT_BRANCH ref = self._format_ref_branch(branch) return self.ref_walker(ref) def commit_info(self, start=0, end=None, branch=None): """Return a generator of commits with all their attached information """ if not self.has_commits: return [] commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)] if not end: return commits return commits[start:end] @funky.uniquify def recent_contributors(self, n=None, branch=None): n = n or 10 return funky.pluck(self.commit_info(end=n, branch=branch), "author") @property def commit_count(self): try: return len(self.ref_walker()) except KeyError: return 0 def commits(self): """Return a list of SHAs for all the concerned commits """ return [commit["sha"] for commit in self.commit_info()] @property def git_dir(self): return self.repo.controldir() def auth(self, *args, **kwargs): self.authenticator = GittleAuth(*args, **kwargs) return self.authenticator # Generate a branch selector (used for pushing) def _wants_branch(self, branch_name=None): branch_name = branch_name or self.DEFAULT_BRANCH refs_key = self._format_ref_branch(branch_name) sha = self.branches[branch_name] def wants_func(old): refs_key = self._format_ref_branch(branch_name) return {refs_key: sha} return wants_func def _get_ignore_regexes(self): gitignore_filename = os.path.join(self.path, ".gitignore") if not os.path.exists(gitignore_filename): return [] lines = open(gitignore_filename).readlines() globers = map(lambda line: line.rstrip(), lines) return utils.paths.globers_to_regex(globers) # Get the absolute path for a file in the git repo def abspath(self, repo_file): return os.path.abspath(os.path.join(self.path, repo_file)) # Get the relative path from the absolute path def relpath(self, abspath): return os.path.relpath(abspath, self.path) @property def last_commit(self): return self[self.repo.head()] @property def index(self): return self.repo.open_index() @classmethod def init(cls, path, bare=None, *args, **kwargs): """Initialize a repository""" mkdir_safe(path) # Constructor to use if bare: constructor = DulwichRepo.init_bare else: constructor = DulwichRepo.init # Create dulwich repo repo = constructor(path) # Create Gittle repo return cls(repo, *args, **kwargs) @classmethod def init_bare(cls, *args, **kwargs): kwargs.setdefault("bare", True) return cls.init(*args, **kwargs) def get_client(self, origin_uri=None, **kwargs): # Get the remote URL origin_uri = origin_uri or self.origin_uri # Fail if inexistant if not origin_uri: raise InvalidRemoteUrl() client_kwargs = {} auth_kwargs = self.authenticator.kwargs() client_kwargs.update(auth_kwargs) client_kwargs.update(kwargs) client_kwargs.update({"report_activity": self.report_activity}) client, remote_path = get_transport_and_path(origin_uri, **client_kwargs) return client, remote_path def push_to(self, origin_uri, branch_name=None, progress=None, progress_stderr=None): selector = self._wants_branch(branch_name=branch_name) client, remote_path = self.get_client(origin_uri, progress_stderr=progress_stderr) return client.send_pack(remote_path, selector, self.repo.object_store.generate_pack_contents, progress=progress) # Like: git push def push(self, origin_uri=None, branch_name=None, progress=None, progress_stderr=None): return self.push_to(origin_uri, branch_name, progress, progress_stderr) # Not recommended at ALL ... !!! def dirty_pull_from(self, origin_uri, branch_name=None): # Remove all previously existing data rmtree(self.path) mkdir_safe(self.path) self.repo = DulwichRepo.init(self.path) # Fetch brand new copy from remote return self.pull_from(origin_uri, branch_name) def pull_from(self, origin_uri, branch_name=None): return self.fetch(origin_uri) # Like: git pull def pull(self, origin_uri=None, branch_name=None): return self.pull_from(origin_uri, branch_name) def fetch_remote(self, origin_uri=None): # Get client client, remote_path = self.get_client(origin_uri=origin_uri) # Fetch data from remote repository remote_refs = client.fetch(remote_path, self.repo) return remote_refs def _setup_fetched_refs(self, refs, origin, bare): remote_tags = utils.git.subrefs(refs, "refs/tags") remote_heads = utils.git.subrefs(refs, "refs/heads") # Filter refs clean_remote_tags = utils.git.clean_refs(remote_tags) clean_remote_heads = utils.git.clean_refs(remote_heads) # Base of new refs heads_base = "refs/remotes/" + origin if bare: heads_base = "refs/heads" # Import branches self.import_refs(heads_base, clean_remote_heads) # Import tags self.import_refs("refs/tags", clean_remote_tags) # Update HEAD self["HEAD"] = refs["HEAD"] def fetch(self, origin_uri=None, bare=None, origin=None): bare = bare or False origin = origin or self.DEFAULT_REMOTE # Remote refs remote_refs = self.fetch_remote(origin_uri) # Update head # Hit repo because head doesn't yet exist so # print("REFS = %s" % remote_refs) # Update refs (branches, tags, HEAD) self._setup_fetched_refs(remote_refs, origin, bare) # Checkout working directories if not bare: self.checkout_all() else: self.update_server_info() @classmethod def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs): """Clone a remote repository""" mkdir_safe(local_path) # Initialize the local repository if bare: local_repo = cls.init_bare(local_path) else: local_repo = cls.init(local_path) repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs) repo.fetch(bare=bare) # Add origin # TODO return repo @classmethod def clone_bare(cls, *args, **kwargs): """Same as .clone except clones to a bare repository by default """ kwargs.setdefault("bare", True) return cls.clone(*args, **kwargs) def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs): if not tree: # If no tree then stage files modified_files = files or self.modified_files logging.warning("STAGING : %s" % modified_files) self.add(modified_files) # Messages message = message or self.DEFAULT_MESSAGE author_msg = self._format_userinfo(author) committer_msg = self._format_userinfo(committer) return self.repo.do_commit( message=message, author=author_msg, committer=committer_msg, encoding="UTF-8", tree=tree, *args, **kwargs ) def _tree_from_structure(self, structure): # TODO : Support directories tree = Tree() for file_info in structure: # str only try: data = file_info["data"].encode("ascii") name = file_info["name"].encode("ascii") mode = file_info["mode"] except: # Skip file on encoding errors continue blob = Blob() blob.data = data # Store file's contents self.repo.object_store.add_object(blob) # Add blob entry tree.add(name, mode, blob.id) # Store tree self.repo.object_store.add_object(tree) return tree.id # Like: git commmit -a def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs): user_info = {"name": name, "email": email} return self._commit(committer=user_info, author=user_info, message=message, files=files, *args, **kwargs) def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs): """Main use is to do commits directly to bare repositories For example doing a first Initial Commit so the repo can be cloned and worked on right away """ if not structure: return tree = self._tree_from_structure(structure) user_info = {"name": name, "email": email} return self._commit(committer=user_info, author=user_info, message=message, tree=tree, *args, **kwargs) # Push all local commits # and pull all remote commits def sync(self, origin_uri=None): self.push(origin_uri) return self.pull(origin_uri) def lookup_entry(self, relpath, trackable_files=set()): if not relpath in trackable_files: raise KeyError abspath = self.abspath(relpath) with open(abspath, "rb") as git_file: data = git_file.read() s = sha1() s.update("blob %u\0" % len(data)) s.update(data) return (s.hexdigest(), os.stat(abspath).st_mode) @property @funky.transform(set) def tracked_files(self): return list(self.index) @property @funky.transform(set) def raw_files(self): return utils.paths.subpaths(self.path) @property @funky.transform(set) def ignored_files(self): return utils.paths.subpaths(self.path, filters=self.filters) @property @funky.transform(set) def trackable_files(self): return self.raw_files - self.ignored_files @property @funky.transform(set) def untracked_files(self): return self.trackable_files - self.tracked_files """ @property @funky.transform(set) def modified_staged_files(self): "Checks if the file has changed since last commit" timestamp = self.last_commit.commit_time index = self.index return [ f for f in self.tracked_files if index[f][1][0] > timestamp ] """ # Return a list of tuples # representing the changed elements in the git tree def _changed_entries(self, ref=None): ref = ref or self.DEFAULT_COMMIT if not self.has_commits: return [] obj_sto = self.repo.object_store tree_id = self[ref].tree names = self.trackable_files lookup_func = partial(self.lookup_entry, trackable_files=names) # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...] tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False) return list(tree_diff) @funky.transform(set) def _changed_entries_by_pattern(self, pattern): changed_entries = self._changed_entries() filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) ] return filtered_paths @property @funky.transform(set) def removed_files(self): return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files @property @funky.transform(set) def added_files(self): return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files @property @funky.transform(set) def modified_files(self): modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files return modified_files @property @funky.transform(set) def modified_unstaged_files(self): timestamp = self.last_commit.commit_time return [f for f in self.tracked_files if os.stat(self.abspath(f)).st_mtime > timestamp] @property def pending_files(self): """ Returns a list of all files that could be possibly staged """ # Union of both return self.modified_files | self.added_files | self.removed_files @property def pending_files_by_state(self): files = {"modified": self.modified_files, "added": self.added_files, "removed": self.removed_files} # "Flip" the dictionary return {path: state for state, paths in files.items() for path in paths} """ @property @funky.transform(set) def modified_files(self): return self.modified_staged_files | self.modified_unstaged_files """ # Like: git add @funky.arglist_method def stage(self, files): return self.repo.stage(files) def add(self, *args, **kwargs): return self.stage(*args, **kwargs) # Like: git rm @funky.arglist_method def rm(self, files, force=False): index = self.index index_files = filter(lambda f: f in index, files) for f in index_files: del self.index[f] return index.write() def mv_fs(self, file_pair): old_name, new_name = file_pair os.rename(old_name, new_name) # Like: git mv @funky.arglist_method def mv(self, files_pair): index = self.index files_in_index = filter(lambda f: f[0] in index, files_pair) map(self.mv_fs, files_in_index) old_files = map(funky.first, files_in_index) new_files = map(funky.last, files_in_index) self.add(new_files) self.rm(old_files) self.add(old_files) return @working_only def _checkout_tree(self, tree): return build_index_from_tree(self.repo.path, self.repo.index_path(), self.repo.object_store, tree) def checkout_all(self, commit_sha=None): commit_sha = commit_sha or self.head commit_tree = self._commit_tree(commit_sha) # Rebuild index from the current tree return self._checkout_tree(commit_tree) def checkout(self, commit_sha=None, files=None): """Checkout only a select amount of files """ commit_sha = commit_sha or self.head files = files or [] return self @funky.arglist_method def reset(self, files, commit="HEAD"): pass def rm_all(self): self.index.clear() return self.index.write() def _to_commit(self, commit_obj): """Allows methods to accept both SHA's or dulwich Commit objects as arguments """ if isinstance(commit_obj, basestring): return self.repo[commit_obj] return commit_obj def _commit_sha(self, commit_obj): """Extracts a Dulwich commits SHA """ if utils.git.is_sha(commit_obj): return commit_obj elif isinstance(commit_obj, basestring): # Can't use self[commit_obj] to avoid infinite recursion commit_obj = self.repo[commit_obj] return commit_obj.id def _blob_data(self, sha): """Return a blobs content for a given SHA """ return self[sha].data # Get the nth parent back for a given commit def get_parent_commit(self, commit, n=None): """ Recursively gets the nth parent for a given commit Warning: Remember that parents aren't the previous commits """ if n is None: n = 1 commit = self._to_commit(commit) parents = commit.parents if n <= 0 or not parents: # Return a SHA return self._commit_sha(commit) parent_sha = parents[0] parent = self[parent_sha] # Recur return self.get_parent_commit(parent, n - 1) def get_previous_commit(self, commit_ref, n=None): commit_sha = self._parse_reference(commit_ref) n = n or 1 commits = self.commits() return funky.next(commits, commit_sha, n=n, default=commit_sha) def _parse_reference(self, ref_string): # COMMIT_REF~x if "~" in ref_string: ref, count = ref_string.split("~") count = int(count) commit_sha = self._commit_sha(ref) return self.get_previous_commit(commit_sha, count) return self._commit_sha(ref_string) def _commit_tree(self, commit_sha): """Return the tree object for a given commit """ return self[commit_sha].tree def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True): diff_type = diff_type or self.DEFAULT_DIFF_TYPE diff_func = self.DIFF_FUNCTIONS[diff_type] if not compare_to: compare_to = self.get_previous_commit(commit_sha) return self._diff_between(compare_to, commit_sha, diff_function=diff_func) def diff_working(self, ref=None, filter_binary=True): """Diff between the current working directory and the HEAD """ return utils.git.diff_changes_paths( self.repo.object_store, self.path, self._changed_entries(ref=ref), filter_binary=filter_binary ) def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None): """Returns a dict of the following Format : { "directory/filename.txt": { 'name': 'filename.txt', 'path': "directory/filename.txt", "sha": "xxxxxxxxxxxxxxxxxxxx", "data": "blablabla", "mode": 0xxxxx", }, ... } """ # Default values context = {} is_tree = is_tree or False parent_path = parent_path or "" if is_tree: tree = self[commit_sha] else: tree = self[self._commit_tree(commit_sha)] for mode, path, sha in tree.entries(): # Check if entry is a directory if mode == self.MODE_DIRECTORY: context.update( self.get_commit_files(sha, parent_path=os.path.join(parent_path, path), is_tree=True, paths=paths) ) continue subpath = os.path.join(parent_path, path) # Only add the files we want if not (paths is None or subpath in paths): continue # Add file entry context[subpath] = {"name": path, "path": subpath, "mode": mode, "sha": sha, "data": self._blob_data(sha)} return context def file_versions(self, path): """Returns all commits where given file was modified """ versions = [] commits_info = self.commit_info() seen_shas = set() for commit in commits_info: try: files = self.get_commit_files(commit["sha"], paths=[path]) file_path, file_data = files.items()[0] except IndexError: continue file_sha = file_data["sha"] if file_sha in seen_shas: continue else: seen_shas.add(file_sha) # Add file info commit["file"] = file_data versions.append(file_data) return versions def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True): """Internal method for getting a diff between two commits Please use .diff method unless you have very speciic needs """ # If commit is first commit (new_commit_sha == old_commit_sha) # then compare to an empty tree if new_commit_sha == old_commit_sha: old_tree = Tree() else: old_tree = self._commit_tree(old_commit_sha) new_tree = self._commit_tree(new_commit_sha) return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary) def changes(self, *args, **kwargs): """ List of changes between two SHAs Returns a list of lists of tuples : [ [ (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) ], ... ] """ kwargs["diff_type"] = "changes" return self.diff(*args, **kwargs) def changes_count(self, *args, **kwargs): return len(self.changes(*args, **kwargs)) def _refs_by_pattern(self, pattern): refs = self.refs def item_filter(key_value): """Filter only concered refs""" key, value = key_value return key.startswith(pattern) def item_map(key_value): """Rewrite keys""" key, value = key_value new_key = key[len(pattern) :] return (new_key, value) return dict(map(item_map, filter(item_filter, refs.items()))) @property def refs(self): return self.repo.get_refs() def set_refs(refs_dict): for k, v in refs_dict.items(): self.repo[k] = v def import_refs(self, base, other): return self.repo.refs.import_refs(base, other) @property def branches(self): return self._refs_by_pattern(self.REFS_BRANCHES) def _active_branch(self, refs=None, head=None): head = head or self.head refs = refs or self.branches try: return {branch: branch_head for branch, branch_head in refs.items() if branch_head == head}.items()[0] except IndexError: pass return (None, None) @property def active_branch(self): return self._active_branch()[0] @property def active_sha(self): return self._active_branch()[1] @property def remote_branches(self): return self._refs_by_pattern(self.REFS_REMOTES) @property def tags(self): return self._refs_by_pattern(self.REFS_TAGS) @property def remotes(self): """ Dict of remotes { 'origin': 'http://friendco.de/some_user/repo.git', ... } """ config = self.repo.get_config() return {keys[1]: values["url"] for keys, values in config.items() if keys[0] == "remote"} def add_ref(self, new_ref, old_ref): self.repo.refs[new_ref] = self.repo.refs[old_ref] self.update_server_info() def remove_ref(self, ref_name): # Returns False if ref doesn't exist if not ref_name in self.repo.refs: return False del self.repo.refs[ref_name] self.update_server_info() return True def create_branch(self, base_branch, new_branch, tracking=None): """Try creating a new branch which tracks the given remote if such a branch does not exist then branch off a local branch """ # The remote to track tracking = self.DEFAULT_REMOTE # Already exists if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) # Get information about remote_branch remote_branch = os.path.sep.join([tracking, base_branch]) # Fork Local if base_branch in self.branches: base_ref = self._format_ref_branch(base_branch) # Fork remote elif remote_branch in self.remote_branches: base_ref = self._format_ref_remote(remote_branch) # TODO : track else: raise Exception( "Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking) ) # Reference of new branch new_ref = self._format_ref_branch(new_branch) # Copy reference to create branch self.add_ref(new_ref, base_ref) return new_ref def remove_branch(self, branch_name): ref = self._format_ref_branch(branch_name) return self.remove_ref(ref) def switch_branch(self, branch_name, tracking=None, create=None): """Changes the current branch """ if create is None: create = True # Check if branch exists if not branch_name in self.branches: self.create_branch(branch_name, branch_name, tracking=tracking) # Get branch reference branch_ref = self._format_ref_branch(branch_name) # Change main branch self.repo.refs.set_symbolic_ref("HEAD", branch_ref) if self.is_working: # Remove all files self.clean_working() # Add files for the current branch self.checkout_all() def clean(self, force=None, directories=None): untracked_files = self.untracked_files map(os.remove, untracked_files) return untracked_files def clean_working(self): """Purges all the working (removes everything except .git) used by checkout_all to get clean branch switching """ return self.clean() def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None): tree = self[tree_sha] structure = {} if depth is None: depth = self.MAX_TREE_DEPTH elif depth == 0: return structure for mode, path, sha in tree.entries(): # tree if mode == self.MODE_DIRECTORY: # Recur structure[path] = self._get_fs_structure(sha, depth=depth - 1, parent_sha=tree_sha) # commit else: structure[path] = sha structure["."] = tree_sha structure[".."] = parent_sha or tree_sha return structure def _get_fs_structure_by_path(self, tree_sha, path): parts = path.split(os.path.sep) depth = len(parts) + 1 structure = self._get_fs_structure(tree_sha, depth=depth) return funky.subkey(structure, parts) def commit_ls(self, ref, subpath=None): """List a "directory" for a given commit using the tree of thqt commit """ tree_sha = self._commit_tree(ref) # Root path if subpath in self.ROOT_PATHS or not subpath: return self._get_fs_structure(tree_sha, depth=1) # Any other path return self._get_fs_structure_by_path(tree_sha, subpath) def commit_file(self, ref, path): """Return info on a given file for a given commit """ name, info = self.get_commit_files(ref, paths=[path]).items()[0] return info def commit_tree(self, ref, *args, **kwargs): tree_sha = self._commit_tree(ref) return self._get_fs_structure(tree_sha, *args, **kwargs) def update_server_info(self): if not self.is_bare: return update_server_info(self.repo) def _is_fast_forward(self): pass def _merge_fast_forward(self): pass def __hash__(self): """This is required otherwise the memoize function will just mess it up """ return hash(self.path) def __getitem__(self, key): sha = self._parse_reference(key) return self.repo[sha] def __setitem__(self, key, value): self.repo[key] = value # Alias to clone_bare fork = clone_bare log = commit_info diff_count = changes_count comtributors = recent_contributors
class Wiki(HookMixin): path = None base_path = '/' default_ref = 'master' default_committer_name = 'Anon' default_committer_email = '*****@*****.**' index_page = 'home' repo = None def __init__(self, path): try: self.repo = Repo(path) except NotGitRepository: self.repo = Repo.init(path, mkdir=True) self.path = path def __repr__(self): return "Wiki: %s" % self.path def commit(self, name, email, message, files): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ print "commit" if isinstance(name, unicode): name = name.encode('utf-8') if isinstance(email, unicode): email = email.encode('utf-8') if isinstance(message, unicode): message = message.encode('utf-8') author = committer = "%s <%s>" % (name, email) self.repo.stage(files) return self.repo.do_commit(message=message, committer=committer, author=author) def push(commit, approved_by): """Commit to the underlying git repo. :param name: Committer name :param email: Committer email :param message: Commit message :param files: list of file names that will be staged for commit :return: """ print commit output = subprocess.check_output(["git", "pull"]) print output output = subprocess.check_output(["git", "rebase -i " + commit]) print output output = subprocess.check_output([":x"]) print output output = subprocess.check_output( ["git", "push origin " + commit + ":master"]) print output def get_page(self, name, sha='HEAD'): """Get page data, partials, commit info. :param name: Name of page. :param sha: Commit sha. :return: dict """ return WikiPage(name, self, sha=sha) def get_index(self): """Get repo index of head. :return: list -- List of dicts """ rv = [] index = self.repo.open_index() for name in index: rv.append( dict(name=filename_to_cname(name), filename=name, ctime=index[name].ctime[0], mtime=index[name].mtime[0], sha=index[name].sha, size=index[name].size)) return rv