def _traverse(tree, repo, blobs, prefix): ''' Traverse through a dulwich Tree object recursively, accumulating all the empty directories within it in the "blobs" list ''' for item in tree.items(): obj = repo.get_object(item.sha) if not isinstance(obj, dulwich.objects.Tree): continue blobs.append(os.path.join(prefix, item.path)) if len(repo.get_object(item.sha)): _traverse(obj, repo, blobs, os.path.join(prefix, item.path))
def _traverse(tree, repo, blobs, prefix): """ Traverse through a dulwich Tree object recursively, accumulating all the empty directories within it in the "blobs" list """ for item in tree.items(): obj = repo.get_object(item.sha) if not isinstance(obj, dulwich.objects.Tree): continue blobs.append(os.path.join(prefix, item.path)) if len(repo.get_object(item.sha)): _traverse(obj, repo, blobs, os.path.join(prefix, item.path))
def get_changelog(repo_path, from_commit=None): """ Given a repo path and an option commit/tag/refspec to start from, will get the rpm compatible changelog Args: repo_path (str): path to the git repo from_commit (str): refspec (partial commit hash, tag, branch, full refspec, partial refspec) to start the changelog from Returns: str: Rpm compatible changelog """ repo = dulwich.repo.Repo(repo_path) tags = get_tags(repo) refs = get_refs(repo) changelog = [] maj_version = 0 feat_version = 0 fix_version = 0 start_including = False cur_line = '' if from_commit is None: start_including = True for commit_sha, children in reversed( get_children_per_first_parent(repo_path).items() ): commit = repo.get_object(commit_sha) maj_version, feat_version, fix_version = get_version( commit=commit, tags=tags, maj_version=maj_version, feat_version=feat_version, fix_version=fix_version, ) version = '%s.%s.%s' % (maj_version, feat_version, fix_version) if ( start_including or commit_sha.startswith(from_commit) or fuzzy_matches_refs(from_commit, refs.get(commit_sha, [])) ): cur_line = pretty_commit( commit, version, ) for child in children: cur_line += pretty_commit(repo.get_object(child), version=None) start_including = True changelog.append(cur_line) return '\n'.join(reversed(changelog))
def _file_list_dulwich(repo, tgt_env, recursive=False): """Get file list using dulwich""" def _traverse(tree, repo_obj, blobs, prefix): """Traverse through a dulwich Tree object recursively, accumulating all the blob paths within it in the "blobs" list""" for item in list(tree.items()): try: obj = repo_obj.get_object(item.sha) except KeyError: # Skip "commit" objects which are links to submodules. continue elem = GitTreeElement() elem.sha = item.sha.decode("ascii") elem.path = os.path.join(prefix, item.path.decode("utf8")) if isinstance(obj, dulwich.objects.Blob): elem.type = "blob" blobs.append(elem) elif isinstance(obj, dulwich.objects.Tree): elem.type = "tree" blobs.append(elem) # Check whether to fetch more than the most upper layer. if recursive: _traverse(obj, repo_obj, blobs, elem.path) tree = repo.get_object(tgt_env.tree) if not isinstance(tree, dulwich.objects.Tree): return [] blobs = [] if len(tree) > 0: _traverse(tree, repo, blobs, "") return blobs
def get_current_version(repo_path): """ Given a repo will return the version string, according to semantic versioning, counting as non-backwards compatible commit any one with a message header that matches (case insensitive):: sem-ver: .*break.* And as features any commit with a header matching:: sem-ver: feature And counting any other as a bugfix """ repo = dulwich.repo.Repo(repo_path) tags = get_tags(repo) maj_version = 0 feat_version = 0 fix_version = 0 for commit_sha in reversed(get_first_parents(repo_path)): commit = repo.get_object(commit_sha) maj_version, feat_version, fix_version = get_version( commit=commit, tags=tags, maj_version=maj_version, feat_version=feat_version, fix_version=fix_version, ) return '%s.%s.%s' % (maj_version, feat_version, fix_version)
def get_repo_object(repo, object_name): try: object_name = object_name.encode() except: pass return repo.get_object(object_name)
def get_repo_object(repo, object_name): try: object_name = object_name.encode() except Exception: pass return repo.get_object(object_name)
def releases(git_dir): repo = dulwich.repo.Repo(git_dir) releases = [] tags = [repo.get_object(repo.refs['refs/tags/%s' % tag]) for tag in repo.refs.keys(base='refs/tags/')] tag_commits = dict((tag.object[1], tag) for tag in tags if tag.type_name == 'tag') for commit in repo.revision_history(repo.refs['HEAD']): if commit.id not in tag_commits: continue tag = tag_commits[commit.id] m = re.match(r'beaker-([\d.]*)$', tag.name) if not m: # also check for tito tags, used up to 0.14.1 m = re.match(r'beaker-([\d.]*)-1$', tag.name) if not m: continue version = m.group(1) name, email = re.match(r'(.*) <(.*)>', tag.tagger).groups() timestamp = datetime.datetime.fromtimestamp(tag.tag_time, tzoffset(None, tag.tag_timezone)) releases.append(Release(version=m.group(1), timestamp=timestamp, name=name, email=email, tag=tag.name)) releases = sorted(releases, key=lambda r: r.timestamp, reverse=True) # skip anything prior to 0.10 releases = list(takewhile(lambda r: r.version != '0.9.4', releases)) return sorted(releases, key=lambda r: r.minor, reverse=True)
def get_commit_ids_and_messages(repo_path, limit=1): """Get up to limit commits in the given repo_path. >>> list(get_commit_ids_and_messages(".", limit=None))[-1] # the root commit of this repo (b'016add7c00f6da53ee3c36b227672b416419c972', b'TEST-123 initial commit of public version\\n', 1542271850) """ repo = dulwich.repo.Repo(repo_path) walker = repo.get_graph_walker() commit_ids = [] n = 0 while limit is None or n < limit: c = walker.next() if c is None: logging.debug("No more commits from walker %s", walker) break yield (c, repo.get_object(c).message, repo.get_object(c).commit_time) n += 1
def get_commit_ids_and_files(repo_path, limit=1, withsizes=False): """Get up to limit commits in the given repo_path. >>> list(get_commit_ids_and_files(".", limit=None))[-1] # the root commit of this repo (b'016add7c00f6da53ee3c36b227672b416419c972', [b'README.org', b'correlate_files_per_issue.py', b'find_all_bugs.py', b'guix.scm', b'link_commits_to_issues.py', b'plot.py', b'retrieve_commits_and_issues.py', b'retrieve_repository_info.py'], [0, 0, 0, 0, 0, 0, 0, 0]) """ repo = dulwich.repo.Repo(repo_path) walker = repo.get_graph_walker() commit_ids = [] n = 0 while limit is None or n < limit: c = walker.next() if c is None: break commit = repo.get_object(c) if commit.parents: prev_tree = repo.get_object(commit.parents[0]).tree else: prev_tree = None files = [] sizes = [] delta = dulwich.diff_tree.tree_changes(repo, prev_tree, commit.tree, want_unchanged=False) for x in delta: if x.new.path is None: continue files.append(x.new.path) if not withsizes: sizes.append(0) continue try: obj = repo.get_object(x.new.sha) data = obj.data length = (len(data) if data is not None else 0) except Exception as e: logging.error("Cannot get data from file %s with sha %s", x.new.path, x.new.sha) length = 0 sizes.append(length) logging.debug("filechange: %s: %s, %s", c, files, sizes) yield (c, files, sizes) n += 1
def _traverse(tree, repo, blobs, prefix): ''' Traverse through a dulwich Tree object recursively, accumulating all the blob paths within it in the "blobs" list ''' for item in tree.items(): obj = repo.get_object(item.sha) if isinstance(obj, dulwich.objects.Blob): blobs.append(os.path.join(prefix, item.path)) elif isinstance(obj, dulwich.objects.Tree): _traverse(obj, repo, blobs, os.path.join(prefix, item.path))
def _traverse(tree, repo, blobs, prefix): """ Traverse through a dulwich Tree object recursively, accumulating all the blob paths within it in the "blobs" list """ for item in tree.items(): obj = repo.get_object(item.sha) if isinstance(obj, dulwich.objects.Blob): blobs.append(os.path.join(prefix, item.path)) elif isinstance(obj, dulwich.objects.Tree): _traverse(obj, repo, blobs, os.path.join(prefix, item.path))
def _dulwich_walk_tree(repo, tree, path): ''' Dulwich does not provide a means of directly accessing subdirectories. This function will walk down to the directory specified by 'path', and return a Tree object at that path. If path is an empty string, the original tree will be returned, and if there are any issues encountered walking the tree, None will be returned. ''' if not path: return tree # Walk down the tree to get to the file for parent in path.split(os.path.sep): try: tree = repo.get_object(tree[parent][1]) except (KeyError, TypeError): # Directory not found, or tree passed into function is not a Tree # object. Either way, desired path does not exist. return None return tree
def _dulwich_walk_tree(repo, tree, path): """ Dulwich does not provide a means of directly accessing subdirectories. This function will walk down to the directory specified by 'path', and return a Tree object at that path. If path is an empty string, the original tree will be returned, and if there are any issues encountered walking the tree, None will be returned. """ if not path: return tree # Walk down the tree to get to the file for parent in path.split(os.path.sep): try: tree = repo.get_object(tree[parent][1]) except (KeyError, TypeError): # Directory not found, or tree passed into function is not a Tree # object. Either way, desired path does not exist. return None return tree
def get_children_per_first_parent(repo_path): repo = dulwich.repo.Repo(repo_path) first_parents = get_first_parents(repo_path) children_per_parent = get_children_per_parent(repo_path) children_per_first_parent = OrderedDict() for first_parent in first_parents: commit = repo.get_object(first_parent) if len(commit.parents) > 1: children = get_merged_commits( repo=repo, commit=commit, first_parents=first_parents, children_per_parent=children_per_parent, ) else: children = set() children_per_first_parent[first_parent] = children return children_per_first_parent
def get_merged_commits(repo, commit, first_parents, children_per_parent): merge_children = set() to_explore = set([commit.sha().hexdigest()]) while to_explore: next_sha = to_explore.pop() next_commit = repo.get_object(next_sha) if (next_sha not in first_parents and not has_firstparent_child( next_sha, first_parents, children_per_parent) or next_sha in commit.parents): merge_children.add(next_sha) non_first_parents = (parent for parent in next_commit.parents if parent not in first_parents) for child_sha in non_first_parents: if child_sha not in merge_children and child_sha != next_sha: to_explore.add(child_sha) return merge_children
def releases(git_dir): repo = dulwich.repo.Repo(git_dir) releases = [] tags = [ repo.get_object(repo.refs['refs/tags/%s' % tag]) for tag in repo.refs.keys(base='refs/tags/') ] tag_commits = {} for tag in tags: if tag.type_name == 'tag': tag_commits.setdefault(tag.object[1], []).append(tag) for walker in repo.get_walker(repo.refs['HEAD']): if walker.commit.id not in tag_commits: continue for tag in tag_commits[walker.commit.id]: m = re.match(r'beaker-([\d.]*)$', tag.name) if m: break # also check for tito tags, used up to 0.14.1 m = re.match(r'beaker-([\d.]*)-1$', tag.name) if m: break if not m: continue version = m.group(1) name, email = re.match(r'(.*) <(.*)>', tag.tagger).groups() timestamp = datetime.datetime.fromtimestamp( tag.tag_time, tzoffset(None, tag.tag_timezone)) releases.append( Release(version=m.group(1), timestamp=timestamp, name=name.decode('utf-8'), email=email, tag=tag.name)) releases = sorted(releases, key=lambda r: r.timestamp, reverse=True) # skip anything prior to 0.9 releases = list(takewhile(lambda r: r.version != '0.8.99', releases)) return sorted(releases, key=lambda r: r.version_tuple, reverse=True)
def get_merged_commits(repo, commit, first_parents, children_per_parent): merge_children = set() to_explore = set([commit.sha().hexdigest()]) while to_explore: next_sha = to_explore.pop() next_commit = repo.get_object(next_sha) if ( next_sha not in first_parents and not has_firstparent_child( next_sha, first_parents, children_per_parent ) or next_sha in commit.parents ): merge_children.add(next_sha) non_first_parents = ( parent for parent in next_commit.parents if parent not in first_parents ) for child_sha in non_first_parents: if child_sha not in merge_children and child_sha != next_sha: to_explore.add(child_sha) return merge_children
def find_file(path, tgt_env='base', **kwargs): ''' Find the first file to match the path and ref, read the file out of git and send the path to the newly cached file ''' fnd = {'path': '', 'rel': ''} if os.path.isabs(path): return fnd provider = _get_provider() base_branch = __opts__['gitfs_base'] gitfs_root = __opts__['gitfs_root'] gitfs_mountpoint = salt.utils.strip_proto(__opts__['gitfs_mountpoint']) if tgt_env == 'base': tgt_env = base_branch dest = os.path.join(__opts__['cachedir'], 'gitfs/refs', tgt_env, path) hashes_glob = os.path.join(__opts__['cachedir'], 'gitfs/hash', tgt_env, '{0}.hash.*'.format(path)) blobshadest = os.path.join(__opts__['cachedir'], 'gitfs/hash', tgt_env, '{0}.hash.blob_sha1'.format(path)) lk_fn = os.path.join(__opts__['cachedir'], 'gitfs/hash', tgt_env, '{0}.lk'.format(path)) destdir = os.path.dirname(dest) hashdir = os.path.dirname(blobshadest) if not os.path.isdir(destdir): os.makedirs(destdir) if not os.path.isdir(hashdir): os.makedirs(hashdir) for repo_conf in init(): repo = repo_conf['repo'] root = repo_conf['root'] if repo_conf['root'] is not None \ else gitfs_root mountpoint = repo_conf['mountpoint'] \ if repo_conf['mountpoint'] is not None \ else gitfs_mountpoint if mountpoint and not path.startswith(mountpoint + os.path.sep): continue repo_path = path[len(mountpoint):].lstrip(os.path.sep) if root: repo_path = os.path.join(root, repo_path) if provider == 'gitpython': tree = _get_tree_gitpython(repo, tgt_env) if not tree: # Branch/tag/SHA not found in repo, try the next continue try: blob = tree / repo_path except KeyError: continue blob_hexsha = blob.hexsha elif provider == 'pygit2': tree = _get_tree_pygit2(repo, tgt_env) if not tree: # Branch/tag/SHA not found in repo, try the next continue try: blob = repo[tree[repo_path].oid] except KeyError: continue blob_hexsha = blob.hex elif provider == 'dulwich': prefix_dirs, _, filename = repo_path.rpartition(os.path.sep) tree = _get_tree_dulwich(repo, tgt_env) tree = _dulwich_walk_tree(repo, tree, prefix_dirs) if not isinstance(tree, dulwich.objects.Tree): # Branch/tag/SHA not found in repo, try the next continue try: # Referencing the path in the tree returns a tuple, the # second element of which is the object ID of the blob blob = repo.get_object(tree[filename][1]) except KeyError: continue blob_hexsha = blob.sha().hexdigest() salt.fileserver.wait_lock(lk_fn, dest) if os.path.isfile(blobshadest) and os.path.isfile(dest): with salt.utils.fopen(blobshadest, 'r') as fp_: sha = fp_.read() if sha == blob_hexsha: fnd['rel'] = path fnd['path'] = dest return fnd with salt.utils.fopen(lk_fn, 'w+') as fp_: fp_.write('') for filename in glob.glob(hashes_glob): try: os.remove(filename) except Exception: pass with salt.utils.fopen(dest, 'w+') as fp_: if provider == 'gitpython': blob.stream_data(fp_) elif provider == 'pygit2': fp_.write(blob.data) elif provider == 'dulwich': fp_.write(blob.as_raw_string()) with salt.utils.fopen(blobshadest, 'w+') as fp_: fp_.write(blob_hexsha) try: os.remove(lk_fn) except (OSError, IOError): pass fnd['rel'] = path fnd['path'] = dest return fnd return fnd
def _get_tree_dulwich(repo, short): """ Return a dulwich.objects.Tree object if the branch/tag/SHA is found, otherwise None """ if short == __opts__["gitfs_base"] or short in envs(): refs = repo.get_refs() # Sorting ensures we check heads (branches) before tags for ref in sorted(_dulwich_env_refs(refs)): # ref will be something like 'refs/heads/master' rtype, rspec = ref[5:].split("/", 1) rspec = rspec.replace("/", "_") if rspec == short and _env_is_exposed(rspec): if rtype == "heads": commit = repo.get_object(refs[ref]) elif rtype == "tags": tag = repo.get_object(refs[ref]) if isinstance(tag, dulwich.objects.Tag): # Tag.get_object() returns a 2-tuple, the 2nd element # of which is the commit SHA to which the tag refers commit = repo.get_object(tag.object[1]) elif isinstance(tag, dulwich.objects.Commit): commit = tag else: log.error( "Unhandled object type {0!r} in " "_get_tree_dulwich. This is a bug, please report " "it.".format(tag.type_name) ) return repo.get_object(commit.tree) # Branch or tag not matched, check if 'short' is a commit. This is more # difficult with Dulwich because of its inability to deal with shortened # SHA-1 hashes. if not _env_is_exposed(short): return None try: int(short, 16) except ValueError: # Not hexidecimal, likely just a non-matching environment return None try: if len(short) == 40: sha_obj = repo.get_object(short) if isinstance(sha_obj, dulwich.objects.Commit): sha_commit = sha_obj else: matches = set( [ x for x in (repo.get_object(x) for x in repo.object_store if x.startswith(short)) if isinstance(x, dulwich.objects.Commit) ] ) if len(matches) > 1: log.warning("Ambiguous commit ID {0!r}".format(short)) return None try: sha_commit = matches.pop() except IndexError: pass except TypeError as exc: log.warning("Invalid environment {0}: {1}".format(short, exc)) except KeyError: # No matching SHA return None try: return repo.get_object(sha_commit.tree) except NameError: # No matching sha_commit object was created. Unable to find SHA. pass return None
def find_file(path, tgt_env="base", **kwargs): """ Find the first file to match the path and ref, read the file out of git and send the path to the newly cached file """ fnd = {"path": "", "rel": ""} if os.path.isabs(path): return fnd provider = _get_provider() base_branch = __opts__["gitfs_base"] gitfs_root = __opts__["gitfs_root"] gitfs_mountpoint = salt.utils.strip_proto(__opts__["gitfs_mountpoint"]) if tgt_env == "base": tgt_env = base_branch dest = os.path.join(__opts__["cachedir"], "gitfs/refs", tgt_env, path) hashes_glob = os.path.join(__opts__["cachedir"], "gitfs/hash", tgt_env, "{0}.hash.*".format(path)) blobshadest = os.path.join(__opts__["cachedir"], "gitfs/hash", tgt_env, "{0}.hash.blob_sha1".format(path)) lk_fn = os.path.join(__opts__["cachedir"], "gitfs/hash", tgt_env, "{0}.lk".format(path)) destdir = os.path.dirname(dest) hashdir = os.path.dirname(blobshadest) if not os.path.isdir(destdir): try: os.makedirs(destdir) except OSError: # Path exists and is a file, remove it and retry os.remove(destdir) os.makedirs(destdir) if not os.path.isdir(hashdir): try: os.makedirs(hashdir) except OSError: # Path exists and is a file, remove it and retry os.remove(hashdir) os.makedirs(hashdir) for repo_conf in init(): repo = repo_conf["repo"] root = repo_conf["root"] if repo_conf["root"] is not None else gitfs_root mountpoint = repo_conf["mountpoint"] if repo_conf["mountpoint"] is not None else gitfs_mountpoint if mountpoint and not path.startswith(mountpoint + os.path.sep): continue repo_path = path[len(mountpoint) :].lstrip(os.path.sep) if root: repo_path = os.path.join(root, repo_path) if provider == "gitpython": tree = _get_tree_gitpython(repo, tgt_env) if not tree: # Branch/tag/SHA not found in repo, try the next continue try: blob = tree / repo_path except KeyError: continue blob_hexsha = blob.hexsha elif provider == "pygit2": tree = _get_tree_pygit2(repo, tgt_env) if not tree: # Branch/tag/SHA not found in repo, try the next continue try: blob = repo[tree[repo_path].oid] except KeyError: continue blob_hexsha = blob.hex elif provider == "dulwich": prefix_dirs, _, filename = repo_path.rpartition(os.path.sep) tree = _get_tree_dulwich(repo, tgt_env) tree = _dulwich_walk_tree(repo, tree, prefix_dirs) if not isinstance(tree, dulwich.objects.Tree): # Branch/tag/SHA not found in repo, try the next continue try: # Referencing the path in the tree returns a tuple, the # second element of which is the object ID of the blob blob = repo.get_object(tree[filename][1]) except KeyError: continue blob_hexsha = blob.sha().hexdigest() salt.fileserver.wait_lock(lk_fn, dest) if os.path.isfile(blobshadest) and os.path.isfile(dest): with salt.utils.fopen(blobshadest, "r") as fp_: sha = fp_.read() if sha == blob_hexsha: fnd["rel"] = path fnd["path"] = dest return fnd with salt.utils.fopen(lk_fn, "w+") as fp_: fp_.write("") for filename in glob.glob(hashes_glob): try: os.remove(filename) except Exception: pass with salt.utils.fopen(dest, "w+") as fp_: if provider == "gitpython": blob.stream_data(fp_) elif provider == "pygit2": fp_.write(blob.data) elif provider == "dulwich": fp_.write(blob.as_raw_string()) with salt.utils.fopen(blobshadest, "w+") as fp_: fp_.write(blob_hexsha) try: os.remove(lk_fn) except (OSError, IOError): pass fnd["rel"] = path fnd["path"] = dest return fnd return fnd
def _get_tree_dulwich(repo, short): ''' Return a dulwich.objects.Tree object if the branch/tag/SHA is found, otherwise False ''' refs = repo.get_refs() # Sorting ensures we check heads (branches) before tags for ref in sorted(_dulwich_env_refs(refs)): # ref will be something like 'refs/heads/master' rtype, rspec = ref[5:].split('/') if rspec == short: if rtype == 'heads': commit = repo.get_object(refs[ref]) elif rtype == 'tags': tag = repo.get_object(refs[ref]) if isinstance(tag, dulwich.objects.Tag): # Tag.get_object() returns a 2-tuple, the 2nd element of # which is the commit SHA to which the tag refers commit = repo.get_object(tag.object[1]) elif isinstance(tag, dulwich.objects.Commit): commit = tag else: log.error( 'Unhandled object type {0!r} in _get_tree_dulwich. ' 'This is a bug, please report it.' .format(tag.type_name) ) return repo.get_object(commit.tree) # Branch or tag not matched, check if 'short' is a commit. This is more # difficult with Dulwich because of its inability to deal with shortened # SHA-1 hashes. try: int(short, 16) except ValueError: # Not hexidecimal, likely just a non-matching environment return None try: if len(short) == 40: sha_obj = repo.get_object(short) if isinstance(sha_obj, dulwich.objects.Commit): sha_commit = sha_obj else: matches = [ x for x in ( repo.get_object(x) for x in repo.object_store if x.startswith(short) ) if isinstance(x, dulwich.objects.Commit) ] if len(matches) > 1: log.warning('Ambiguous commit ID {0!r}'.format(short)) return None try: sha_commit = matches[0] except IndexError: pass except TypeError as exc: log.warning('Invalid environment {0}: {1}'.format(short, exc)) except KeyError: # No matching SHA return None try: return repo.get_object(sha_commit.tree) except NameError: # No matching sha_commit object was created. Unable to find SHA. pass return None