def is_stage_empty(): """ Comapres the entries from the last commit object with the ones in the index file. If there is a difference, the stage is not empty. If there is a difference, the stage is not empty """ from .gitObject import read_object from .gitTree import get_subtree_entries local_sha1 = get_active_branch_hash() obj_type, data = read_object(local_sha1) assert obj_type == 'commit' splitted_commit = data.decode().splitlines() #We want to get the tree hash for line in splitted_commit: if line.startswith('tree '): tree_sha1 = line[5:] break #so that we can read the top tree object committed_entries = [] get_subtree_entries(tree_sha1, '', committed_entries) index = read_index() for indexEntry in index: # found is used for entries which have not been committed yet. found = False for treeEntry in committed_entries: if treeEntry[0] == indexEntry.path: if treeEntry[1] != indexEntry.sha1.hex(): return False found = True # if found is false, the entry has not been committed yet and there is a difference between staging and commit if not found: return False return True
def push(): """Push current active branch to given git repo URL.""" if not check_if_repo_created(): print( 'Repository has not been registered yet. Use\n\n`git3 create`\n\nbefore you push' ) return active_branch_name = get_current_branch_name() local_sha1 = get_active_branch_hash() remote_database_cid = get_remote_branch_hash(active_branch_name) client = getStorageClient() # if remote_cid is none, nothing has been pushed yet. if remote_database_cid != None: # since there is already something pushed, we will have to get the remote cid remote_database = client.get_json(remote_database_cid) remote_commit_cid = remote_database['head_cid'] remote_commit = client.get_json(remote_commit_cid) remote_sha1 = remote_commit['sha1'] else: remote_commit_cid = None remote_sha1 = None # is going to contain all data in order to make loading the directory structure faster! remote_database = {'files': {}} remote_database['path'] = ['files'] if local_sha1 == remote_sha1: print('Everything up-to-date') return elif check_if_remote_ahead(remote_sha1): print('Remote repository is ahead. Fetch and merge the changes first') return print('Pushing files to IPFS') branch_cid = push_commit(local_sha1, remote_sha1, remote_commit_cid, remote_database) remote_database['head_cid'] = branch_cid if branch_cid == remote_database_cid: print('Everything up-to-date') else: del remote_database['path'] del remote_database['committer'] del remote_database['currentCommitMessage'] branch_cid = push_data_to_storage(remote_database) # branch_cid = client.add_json(remote_database) print('Going to write the CID into repository contract') push_new_cid(active_branch_name, branch_cid)
def get_status_commit(): """ Get status of HEAD commit, return tuple of (changed_paths, new_paths, deleted_paths). """ local_sha1 = get_active_branch_hash() commit_entries = read_commit_entries(local_sha1) commit_paths = set(commit_entries) entries_by_path = {e.path: e for e in read_index()} entry_paths = set(entries_by_path) changed = { p for p in (commit_paths & entry_paths) if commit_entries[p] != entries_by_path[p].sha1.hex() } deleted = commit_paths - entry_paths new = entry_paths - commit_paths return (sorted(changed), sorted(new), sorted(deleted))
def merge(source_branch): """ Merges two branches. If the source_branch parameter is set, the source branch is merged into the current branch. If the parameter is not set, a merge from FETCH_HEAD is performed. """ had_conflict = False repo_root_path = get_repo_root_path() # if no source branch for merge is give, we go through the FETCH_HEAD file if source_branch is None: fetch_head_path = os.path.join(repo_root_path, '.git/FETCH_HEAD') if not os.path.exists(fetch_head_path): print('Nothing to merge. Have you called fetch before?') return fetch_head_content = read_file(fetch_head_path).decode('utf-8') findings = re.findall('^([ABCDEFabcdef0-9]+)\s+branch (\w|\')+', fetch_head_content) if len(findings) == 0: remote_sha1 = None else: remote_sha1 = findings[0][0] else: # otherwise we are looking for the refs file first. source_branch_head_path = os.path.join(repo_root_path, '.git/refs/heads/', source_branch) if not os.path.exists(source_branch_head_path): # if the refs file does not exist, we are having a look if the packed-refs file exits # git doesn't use the FETCH_HEAD file when a branch name is given! packed_refs_path = os.path.join(repo_root_path, '.git/packed-refs') if not os.path.exists(packed_refs_path): # if not, we are printing an error message and return remote_sha1 = None # otherwise we read the packed-refs file packed_refs_content = read_file(packed_refs_path).decode('utf-8') # and read the commit hash findings = re.findall( '([ABCDEFabcdef0-9]*) refs\/remotes\/origin\/{}'.format( source_branch), packed_refs_content) if len(findings) == 0: remote_sha1 = None else: remote_sha1 = findings[0] else: # if the file exists, we read the sha1 from it remote_sha1 = read_file(source_branch_head_path).decode('utf-8') if remote_sha1 is None: print('merge: {} - not something we can merge'.format(source_branch)) exit(1) activeBranch = get_current_branch_name() local_sha1 = get_active_branch_hash() remote_sha1 = remote_sha1.strip() local_sha1 = local_sha1.strip() if remote_sha1 == local_sha1: return remote_commits = get_all_local_commits(remote_sha1) local_commits = get_all_local_commits(local_sha1) difference = set(local_commits) - set(remote_commits) if len(difference) == 0: #fast forward strategy path = os.path.join(repo_root_path, '.git/refs/heads/{}'.format(activeBranch)) write_file(path, "{}\n".format(remote_sha1).encode()) obj_type, commit_data = read_object(remote_sha1.strip()) tree_sha1 = commit_data.decode().splitlines()[0][5:45] unpack_object(tree_sha1, repo_root_path, repo_root_path) return # non fast forward strategy intersection = set(local_commits).intersection(remote_commits) for commit_hash in remote_commits: if commit_hash in intersection: ancestor = commit_hash break # We need to find an ancestor and run 3-way merge on these files! # than we need to create a new tree and a commit object with 2 parents obj_type, ancestor_commit = read_object(ancestor) obj_type, a_commit = read_object(local_commits[0]) obj_type, b_commit = read_object(remote_commits[0]) # list for the 3 branches ancestor_entries = [] a_entries = [] b_entries = [] # here we get a list in the following format [(filename, sha1), (filename, sha2), ...] get_subtree_entries(ancestor_commit.splitlines()[0][5:45].decode(), '', ancestor_entries) get_subtree_entries(a_commit.splitlines()[0][5:45].decode(), '', a_entries) get_subtree_entries(b_commit.splitlines()[0][5:45].decode(), '', b_entries) merge = {} # wo go through each list and use the filename as key and create a list of hashed for e in ancestor_entries: if e[0] not in merge: merge[e[0]] = [e[1]] for e in a_entries: if e[0] not in merge: merge[e[0]] = [None, e[1]] else: merge[e[0]].append(e[1]) for e in b_entries: if e[0] not in merge: merge[e[0]] = [None, None, e[1]] else: merge[e[0]].append(e[1]) # if all hashes are the same, there is nothing we have to do # In case the second and third entry are not None, but the first one is: I am not sure if this case actually is possible conflict_files = [] for f in merge: if len(merge[f]) == 2 and merge[f][0] != merge[f][1]: # if there are only two entries, the remote branch does not have the file and we will add it to the repository obj_type, data = read_object(merge[f][1]) path = os.path.join(repo_root_path, f) if not os.path.exists(path): os.makedirs(os.path.dirname(path), exist_ok=True) write_file(path, data) elif merge[f][0] == None and merge[f][1] == None: # if there are three entries and the first two entries are none, the local repository does not have the file # so we add it. obj_type, data = read_object(merge[f][2]) path = os.path.join(repo_root_path, f) if not os.path.exists(path): os.makedirs(os.path.dirname(path), exist_ok=True) write_file(path, data) elif len(set(merge[f])) == 3: # all entries are different, so 3-way merge # read the content of each file obj_type, base_data = read_object(merge[f][0]) obj_type, local_data = read_object(merge[f][1]) obj_type, remote_data = read_object(merge[f][2]) #do the 3-way merge had_conflict, merged_lines = three_way_merge( base_data.decode().splitlines(), local_data.decode().splitlines(), remote_data.decode().splitlines(), "HEAD", merge[f][2]) # writing the merged lines into the file with open(os.path.join(repo_root_path, f), 'w') as file: for line in merged_lines: file.write('{}\n'.format(line)) if had_conflict: # adding file to list, so that we don't add it to the index conflict_files.append(f) path = os.path.join(repo_root_path, '.git/ORIG_HEAD') write_file(path, '{}\n'.format(local_sha1).encode()) path = os.path.join(repo_root_path, '.git/MERGE_HEAD') write_file(path, '{}\n'.format(fetch_head[:40].decode()).encode()) path = os.path.join(repo_root_path, '.git/MERGE_MODE') write_file(path, b'') path = os.path.join(repo_root_path, '.git/MERGE_MSG') if os.path.exists(path): # append file name to conflict with open(path, 'a') as f: f.write('# \t{}'.format(f)) else: repo_name = read_repo_name() if not repo_name.startswith('location:'): # Need to check if the return is handled by the calling function print('.git/name file has an error. Exiting...') return False tmp = repo_name.split('location:')[1].split(':') network = tmp[0].strip() user_key = tmp[1].strip() git_factory = get_factory_contract(network) repository = git_factory.functions.getRepository( user_key).call() write_file( path, 'Merge branch \'{}\' of {} into {}\n\n# Conflicts\n# \t{}\n' .format(source_branch, repository[2], activeBranch, f).encode()) # adding all the files to the index. TODO: can be more efficient if we add it to the previous loop files_to_add = [] pwd = os.getcwd() os.chdir(repo_root_path) for path, subdirs, files in os.walk('.'): for name in files: # we don't want to add the files under .git to the index if not path.startswith('./.git') and name not in conflict_files: files_to_add.append(os.path.join(path, name)[2:]) os.chdir(pwd) add(files_to_add) # creating a commit object with two parents if not had_conflict: commit('Merging {} into {}'.format(source_branch, activeBranch), parent1=local_commits[0], parent2=remote_commits[0])
def diff(staged): """Show diff of files changed (between index and working copy).""" # checks if there is a diff between index and HEAD if staged: # we don't use deleted for now, since we don't have git3 rm command changed, new, deleted = get_status_commit() entries_by_path = {e.path: e for e in read_index()} local_sha1 = get_active_branch_hash() commit_entries = read_commit_entries(local_sha1) changed.extend(new) for i, path in enumerate(changed): sha1 = entries_by_path[path].sha1.hex() obj_type, data = read_object(sha1) assert obj_type == 'blob' # content from file which is stored in .git/objects/ index_lines = data.decode().splitlines() # if the path is not in the commit_entries dict, it means, that it is # available in the index but has not been committed yet if path in commit_entries: commit_path = path sha1 = commit_entries[path] obj_type, data = read_object(sha1) assert obj_type == 'blob' # content from file which is stored in .git/objects/ commit_lines = data.decode().splitlines() else: commit_path = '/dev/null' commit_lines = '' diff_lines = difflib.unified_diff( commit_lines, index_lines, '{} (commit)'.format(commit_path), '{} (index)'.format(path), lineterm='') for line in diff_lines: print(line) if i < len(changed) - 1: print('-' * 70) else: # Show difference between working tree and index file changed, _, deleted = get_status_workspace() # gets all entries from the index file and puts those into a dict # the path is the key and IndexEntry is the value entries_by_path = {e.path: e for e in read_index()} changed.extend(deleted) for i, path in enumerate(changed): sha1 = entries_by_path[path].sha1.hex() obj_type, data = read_object(sha1) assert obj_type == 'blob' # content from file which is stored in .git/objects/ index_lines = data.decode().splitlines() try: # content from file which is stored in the working directory working_lines = read_file(path).decode().splitlines() work_tree_path = path except FileNotFoundError: # when this part is triggered, it means that the file has been # deleted from the working tree working_lines = [] work_tree_path = '/dev/null' diff_lines = difflib.unified_diff( index_lines, working_lines, '{} (index)'.format(path), '{} (working copy)'.format(work_tree_path), lineterm='') for line in diff_lines: print(line) if i < len(changed) - 1: print('-' * 70)
def pull(): print('Pulling') changed, _, _ = get_status_workspace() # we are checking if there a changed files in the working copy or files staged which have not been committed. # if one case is true, pull won't be executed if len(changed) > 0 or not is_stage_empty(): print("You have local changes. Add and commit those first") return repo_name = read_repo_name() if not repo_name.startswith('location:'): print('.git/name file has an error. Exiting...') return False tmp = repo_name.split('location:')[1].split(':') network = tmp[0].strip() user_key = tmp[1].strip() git_factory = get_factory_contract(network) repository = git_factory.functions.getRepository(user_key).call() if not repository[0]: print('No such repository') return git_repo_address = repository[2] activeBranch = get_current_branch_name() branch_contract = get_facet_contract("GitBranch", git_repo_address, network) branch = branch_contract.functions.getBranch(activeBranch).call() headCid = branch[1] remote_commits = get_all_remote_commits(headCid) #extract only the sha1 hash remote_commits_sha1 = [e['sha1'] for e in remote_commits] root_path = get_repo_root_path() local_commit = get_active_branch_hash() local_commits = get_all_local_commits(local_commit) if local_commits[0] == remote_commits_sha1[0]: print('Already up to date') return remote_to_local_difference = set(remote_commits_sha1) - set(local_commits) local_to_remote_difference = set(local_commits) - set(remote_commits_sha1) if len(remote_to_local_difference ) == 0 and len(local_to_remote_difference) > 0: print('You are ahead of remote branch') return elif len(remote_to_local_difference) == 0 and len( local_to_remote_difference) == 0: print('Nothing to pull') return elif len(local_to_remote_difference) == 0: # alright, we filtered what needs to be downloaded and unpacked # check clone on how to do that! remote_commits = list( filter(lambda x: x['sha1'] in remote_to_local_difference, remote_commits)) repo_name = root_path.split('/')[-1] #unpack files from the newest commit first = True for commit in remote_commits: unpack_files_of_commit(root_path, commit, first) first = False refs_path = os.path.join(root_path, '.git', 'refs', 'heads', activeBranch) write_file(refs_path, (remote_commits[0]['sha1'] + '\n').encode()) # we are deleting all the files in the repo # there might be a better way, where we iterate over all of the files, # hash and compare the hashes. If there is no difference, leave as is, otherwise # overwrite. We would also need to check for files which are not in the index! # Maybe something at a later point in time :) # Same at checkout commit_entries = read_commit_entries(remote_commits[0]['sha1']) remove_files_from_repo() files_to_add = [] for filename in commit_entries: object_type, data = read_object(commit_entries[filename]) assert object_type == 'blob' write_file('{}/{}'.format(root_path, filename), data.decode('utf-8'), binary='') files_to_add.append(filename) # remove index file os.remove('{}/.git/index'.format(root_path)) add(files_to_add)
def commit(message: str, author: str = None, parent1: str = None, parent2: str = None) -> str: """ Commit the current state of the index to active branch with given message. Returns the hash of the commit object. Parameters: message (str): The message for the commit. author (str): The author of the commit. parent1 (str): The first parent of the commit. parent2 (str): The second parent of the commit. Returns: Return hash of commit object. """ try: index = read_index() # we are working on write tree tree = hash_object(b''.join(write_tree(index)), 'tree') except NoRepositoryError as nre: print(nre) exit(1) if parent1 == None: # even the get_active_branch_hash throws a NoRepositoryError # we don't have to catch it, since we are doing it already further up in the code # If there is no repository, we won't reach this code here anyways. parent = get_active_branch_hash() else: parent = parent1 # check if there is a MERGE_HEAD file. If there is, parent2 is set to the sha1 hash merge_head_path = os.path.join(get_repo_root_path(), '.git', 'MERGE_HEAD') if os.path.exists(merge_head_path): parent2 = read_file(merge_head_path).decode().strip() if author is None: # get_value_from_config_file throws a NoRepositoryError # but the same as above, we don't have to catch it user_name = get_value_from_config_file('name') user_email = get_value_from_config_file('email') author = '{} <{}>'.format(user_name, user_email) timestamp = int(time.mktime(time.localtime())) utc_offset = -time.timezone author_time = '{} {}{:02}{:02}'.format(timestamp, '+' if utc_offset > 0 else '-', abs(utc_offset) // 3600, (abs(utc_offset) // 60) % 60) lines = ['tree ' + tree] if parent: lines.append('parent ' + parent) if parent2 != None: lines.append('parent ' + parent2) lines.append('author {} {}'.format(author, author_time)) lines.append('committer {} {}'.format(author, author_time)) lines.append('') lines.append(message) lines.append('') data = '\n'.join(lines).encode() sha1 = hash_object(data, 'commit') repo_root_path = get_repo_root_path() activeBranch = get_current_branch_name() branch_path = os.path.join(repo_root_path, '.git', 'refs', 'heads', activeBranch) write_file(branch_path, (sha1 + '\n').encode()) # remove the merge files from the .git directory if committed if parent2 != None and os.path.exists(merge_head_path): os.remove(merge_head_path) merge_mode_path = merge_head_path.replace('MERGE_HEAD', 'MERGE_MODE') os.remove(merge_mode_path) merge_msg_path = merge_head_path.replace('MERGE_HEAD', 'MERGE_MSG') os.remove(merge_msg_path) #TODO: git returns the number of files added and changed. Would be good too print('[{} {}] {}'.format(activeBranch, sha1[:7], message)) print('Author: {}'.format(author)) return sha1