def test_read_objects(self): f = BytesIO() entries = build_pack(f, [ (Blob.type_num, 'blob'), (OFS_DELTA, (0, 'blob1')), ]) reader = PackStreamReader(f.read) objects = list(reader.read_objects(compute_crc32=True)) self.assertEqual(2, len(objects)) unpacked_blob, unpacked_delta = objects self.assertEqual(entries[0][0], unpacked_blob.offset) self.assertEqual(Blob.type_num, unpacked_blob.pack_type_num) self.assertEqual(Blob.type_num, unpacked_blob.obj_type_num) self.assertEqual(None, unpacked_blob.delta_base) self.assertEqual('blob', ''.join(unpacked_blob.decomp_chunks)) self.assertEqual(entries[0][4], unpacked_blob.crc32) self.assertEqual(entries[1][0], unpacked_delta.offset) self.assertEqual(OFS_DELTA, unpacked_delta.pack_type_num) self.assertEqual(None, unpacked_delta.obj_type_num) self.assertEqual(unpacked_delta.offset - unpacked_blob.offset, unpacked_delta.delta_base) delta = create_delta('blob', 'blob1') self.assertEqual(delta, ''.join(unpacked_delta.decomp_chunks)) self.assertEqual(entries[1][4], unpacked_delta.crc32)
def test_simple_delta(self): b1 = Blob.from_string("a" * 101) b2 = Blob.from_string("a" * 100) delta = create_delta(b1.as_raw_string(), b2.as_raw_string()) self.assertEqual( [(b1.type_num, b1.sha().digest(), None, b1.as_raw_string()), (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta)], list(deltify_pack_objects([(b1, ""), (b2, "")])))
def test_simple_delta(self): b1 = Blob.from_string("a" * 101) b2 = Blob.from_string("a" * 100) delta = create_delta(b1.as_raw_string(), b2.as_raw_string()) self.assertEqual([ (b1.type_num, b1.sha().digest(), None, b1.as_raw_string()), (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta) ], list(deltify_pack_objects([(b1, ""), (b2, "")])))
def _test_roundtrip(self, base, target): self.assertEquals(target, "".join(apply_delta(base, create_delta(base, target))))
def build_pack(f, objects_spec, store=None): """Write test pack data from a concise spec. Args: f: A file-like object to write the pack to. objects_spec: A list of (type_num, obj). For non-delta types, obj is the string of that object's data. For delta types, obj is a tuple of (base, data), where: * base can be either an index in objects_spec of the base for that * delta; or for a ref delta, a SHA, in which case the resulting pack * will be thin and the base will be an external ref. * data is a string of the full, non-deltified data for that object. Note that offsets/refs and deltas are computed within this function. store: An optional ObjectStore for looking up external refs. Returns: A list of tuples in the order specified by objects_spec: (offset, type num, data, sha, CRC32) """ sf = SHA1Writer(f) num_objects = len(objects_spec) write_pack_header(sf, num_objects) full_objects = {} offsets = {} crc32s = {} while len(full_objects) < num_objects: for i, (type_num, data) in enumerate(objects_spec): if type_num not in DELTA_TYPES: full_objects[i] = (type_num, data, obj_sha(type_num, [data])) continue base, data = data if isinstance(base, int): if base not in full_objects: continue base_type_num, _, _ = full_objects[base] else: base_type_num, _ = store.get_raw(base) full_objects[i] = ( base_type_num, data, obj_sha(base_type_num, [data]), ) for i, (type_num, obj) in enumerate(objects_spec): offset = f.tell() if type_num == OFS_DELTA: base_index, data = obj base = offset - offsets[base_index] _, base_data, _ = full_objects[base_index] obj = (base, create_delta(base_data, data)) elif type_num == REF_DELTA: base_ref, data = obj if isinstance(base_ref, int): _, base_data, base = full_objects[base_ref] else: base_type_num, base_data = store.get_raw(base_ref) base = obj_sha(base_type_num, base_data) obj = (base, create_delta(base_data, data)) crc32 = write_pack_object(sf, type_num, obj) offsets[i] = offset crc32s[i] = crc32 expected = [] for i in range(num_objects): type_num, data, sha = full_objects[i] assert len(sha) == 20 expected.append((offsets[i], type_num, data, sha, crc32s[i])) sf.write_sha() f.seek(0) return expected
def import_git_commit(self, commit): self.ui.debug(_("importing: %s\n") % commit.id) (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message) # get a list of the changed, added, removed files files = self.get_files_changed(commit) date = (commit.author_time, -commit.author_timezone) text = strip_message origtext = text try: text.decode('utf-8') except UnicodeDecodeError: text = self.decode_guess(text, commit.encoding) text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n') if text + '\n' != origtext: extra['message'] = create_delta(text +'\n', origtext) author = commit.author # convert extra data back to the end if ' ext:' in commit.author: regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') m = regex.match(commit.author) if m: name = m.group(1) ex = urllib.unquote(m.group(2)) email = m.group(3) author = name + ' <' + email + '>' + ex if ' <none@none>' in commit.author: author = commit.author[:-12] try: author.decode('utf-8') except UnicodeDecodeError: origauthor = author author = self.decode_guess(author, commit.encoding) extra['author'] = create_delta(author, origauthor) oldenc = self.swap_out_encoding() def getfilectx(repo, memctx, f): delete, mode, sha = files[f] if delete: raise IOError data = self.git[sha].data copied_path = hg_renames.get(f) e = self.convert_git_int_mode(mode) return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path) gparents = map(self.map_hg_get, commit.parents) p1, p2 = (nullid, nullid) octopus = False if len(gparents) > 1: # merge, possibly octopus def commit_octopus(p1, p2): ctx = context.memctx(self.repo, (p1, p2), text, list(files), getfilectx, author, date, {'hg-git': 'octopus'}) return hex(self.repo.commitctx(ctx)) octopus = len(gparents) > 2 p2 = gparents.pop() p1 = gparents.pop() while len(gparents) > 0: p2 = commit_octopus(p1, p2) p1 = gparents.pop() else: if gparents: p1 = gparents.pop() pa = None if not (p2 == nullid): node1 = self.repo.changectx(p1) node2 = self.repo.changectx(p2) pa = node1.ancestor(node2) # if named branch, add to extra if hg_branch: extra['branch'] = hg_branch # if committer is different than author, add it to extra if commit.author != commit.committer \ or commit.author_time != commit.commit_time \ or commit.author_timezone != commit.commit_timezone: extra['committer'] = "%s %d %d" % ( commit.committer, commit.commit_time, -commit.commit_timezone) if commit.encoding: extra['encoding'] = commit.encoding if hg_branch: extra['branch'] = hg_branch if octopus: extra['hg-git'] ='octopus-done' # TODO use 'n in self.repo' when we require hg 1.5 def repo_contains(n): try: return bool(self.repo.lookup(n)) except error.RepoLookupError: return False if not (repo_contains(p1) and repo_contains(p2)): raise hgutil.Abort(_('you appear to have run strip - ' 'please run hg git-cleanup')) ctx = context.memctx(self.repo, (p1, p2), text, list(files), getfilectx, author, date, extra) node = self.repo.commitctx(ctx) self.swap_out_encoding(oldenc) # save changeset to mapping file cs = hex(node) self.map_set(commit.id, cs)
def build_pack(f, objects_spec, store=None): """Write test pack data from a concise spec. :param f: A file-like object to write the pack to. :param objects_spec: A list of (type_num, obj). For non-delta types, obj is the string of that object's data. For delta types, obj is a tuple of (base, data), where: * base can be either an index in objects_spec of the base for that * delta; or for a ref delta, a SHA, in which case the resulting pack * will be thin and the base will be an external ref. * data is a string of the full, non-deltified data for that object. Note that offsets/refs and deltas are computed within this function. :param store: An optional ObjectStore for looking up external refs. :return: A list of tuples in the order specified by objects_spec: (offset, type num, data, sha, CRC32) """ sf = SHA1Writer(f) num_objects = len(objects_spec) write_pack_header(sf, num_objects) full_objects = {} offsets = {} crc32s = {} while len(full_objects) < num_objects: for i, (type_num, data) in enumerate(objects_spec): if type_num not in DELTA_TYPES: full_objects[i] = (type_num, data, obj_sha(type_num, [data])) continue base, data = data if isinstance(base, int): if base not in full_objects: continue base_type_num, _, _ = full_objects[base] else: base_type_num, _ = store.get_raw(base) full_objects[i] = (base_type_num, data, obj_sha(base_type_num, [data])) for i, (type_num, obj) in enumerate(objects_spec): offset = f.tell() if type_num == OFS_DELTA: base_index, data = obj base = offset - offsets[base_index] _, base_data, _ = full_objects[base_index] obj = (base, create_delta(base_data, data)) elif type_num == REF_DELTA: base_ref, data = obj if isinstance(base_ref, int): _, base_data, base = full_objects[base_ref] else: base_type_num, base_data = store.get_raw(base_ref) base = obj_sha(base_type_num, base_data) obj = (base, create_delta(base_data, data)) crc32 = write_pack_object(sf, type_num, obj) offsets[i] = offset crc32s[i] = crc32 expected = [] for i in range(num_objects): type_num, data, sha = full_objects[i] expected.append((offsets[i], type_num, data, sha, crc32s[i])) sf.write_sha() f.seek(0) return expected
def _test_roundtrip(self, base, target): self.assertEqual(target, ''.join(apply_delta(base, create_delta(base, target))))
def import_git_commit(self, commit): self.ui.debug(_("importing: %s\n") % commit.id) (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message) # get a list of the changed, added, removed files files = self.get_files_changed(commit) date = (commit.author_time, -commit.author_timezone) text = strip_message origtext = text try: text.decode('utf-8') except UnicodeDecodeError: text = self.decode_guess(text, commit.encoding) text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n') if text + '\n' != origtext: extra['message'] = create_delta(text +'\n', origtext) author = commit.author # convert extra data back to the end if ' ext:' in commit.author: regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') m = regex.match(commit.author) if m: name = m.group(1) ex = urllib.unquote(m.group(2)) email = m.group(3) author = name + ' <' + email + '>' + ex if ' <none@none>' in commit.author: author = commit.author[:-12] try: author.decode('utf-8') except UnicodeDecodeError: origauthor = author author = self.decode_guess(author, commit.encoding) extra['author'] = create_delta(author, origauthor) oldenc = self.swap_out_encoding() def findconvergedfiles(p1, p2): # If any files have the same contents in both parents of a merge # (and are therefore not reported as changed by Git) but are at # different file revisions in Mercurial (because they arrived at # those contents in different ways), we need to include them in # the list of changed files so that Mercurial can join up their # filelog histories (same as if the merge was done in Mercurial to # begin with). if p2 == nullid: return [] manifest1 = self.repo.changectx(p1).manifest() manifest2 = self.repo.changectx(p2).manifest() return [path for path, node1 in manifest1.iteritems() if path not in files and manifest2.get(path, node1) != node1] def getfilectx(repo, memctx, f): info = files.get(f) if info != None: # it's a file reported as modified from Git delete, mode, sha = info if delete: raise IOError data = self.git[sha].data copied_path = hg_renames.get(f) e = self.convert_git_int_mode(mode) else: # it's a converged file fc = context.filectx(self.repo, f, changeid=memctx.p1().rev()) data = fc.data() e = fc.flags() copied_path = fc.renamed() return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path) gparents = map(self.map_hg_get, commit.parents) p1, p2 = (nullid, nullid) octopus = False if len(gparents) > 1: # merge, possibly octopus def commit_octopus(p1, p2): ctx = context.memctx(self.repo, (p1, p2), text, list(files) + findconvergedfiles(p1, p2), getfilectx, author, date, {'hg-git': 'octopus'}) return hex(self.repo.commitctx(ctx)) octopus = len(gparents) > 2 p2 = gparents.pop() p1 = gparents.pop() while len(gparents) > 0: p2 = commit_octopus(p1, p2) p1 = gparents.pop() else: if gparents: p1 = gparents.pop() pa = None if not (p2 == nullid): node1 = self.repo.changectx(p1) node2 = self.repo.changectx(p2) pa = node1.ancestor(node2) # if named branch, add to extra if hg_branch: extra['branch'] = hg_branch # if committer is different than author, add it to extra if commit.author != commit.committer \ or commit.author_time != commit.commit_time \ or commit.author_timezone != commit.commit_timezone: extra['committer'] = "%s %d %d" % ( commit.committer, commit.commit_time, -commit.commit_timezone) if commit.encoding: extra['encoding'] = commit.encoding if hg_branch: extra['branch'] = hg_branch if octopus: extra['hg-git'] ='octopus-done' # TODO use 'n in self.repo' when we require hg 1.5 def repo_contains(n): try: return bool(self.repo.lookup(n)) except error.RepoLookupError: return False if not (repo_contains(p1) and repo_contains(p2)): raise hgutil.Abort(_('you appear to have run strip - ' 'please run hg git-cleanup')) ctx = context.memctx(self.repo, (p1, p2), text, list(files) + findconvergedfiles(p1, p2), getfilectx, author, date, extra) node = self.repo.commitctx(ctx) self.swap_out_encoding(oldenc) # save changeset to mapping file cs = hex(node) self.map_set(commit.id, cs)
def _test_roundtrip(self, base, target): self.assertEquals([target], apply_delta(base, create_delta(base, target)))
def import_git_commit(self, commit): self.ui.debug(_("importing: %s\n") % commit.id) (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message) # get a list of the changed, added, removed files files = self.get_files_changed(commit) date = (commit.author_time, -commit.author_timezone) text = strip_message origtext = text try: text.decode('utf-8') except UnicodeDecodeError: text = self.decode_guess(text, commit.encoding) text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n') if text + '\n' != origtext: extra['message'] = create_delta(text + '\n', origtext) author = commit.author # convert extra data back to the end if ' ext:' in commit.author: regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') m = regex.match(commit.author) if m: name = m.group(1) ex = urllib.unquote(m.group(2)) email = m.group(3) author = name + ' <' + email + '>' + ex if ' <none@none>' in commit.author: author = commit.author[:-12] try: author.decode('utf-8') except UnicodeDecodeError: origauthor = author author = self.decode_guess(author, commit.encoding) extra['author'] = create_delta(author, origauthor) oldenc = self.swap_out_encoding() def getfilectx(repo, memctx, f): delete, mode, sha = files[f] if delete: raise IOError data = self.git[sha].data copied_path = hg_renames.get(f) e = self.convert_git_int_mode(mode) return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path) gparents = map(self.map_hg_get, commit.parents) p1, p2 = (nullid, nullid) octopus = False if len(gparents) > 1: # merge, possibly octopus def commit_octopus(p1, p2): ctx = context.memctx(self.repo, (p1, p2), text, list(files), getfilectx, author, date, {'hg-git': 'octopus'}) return hex(self.repo.commitctx(ctx)) octopus = len(gparents) > 2 p2 = gparents.pop() p1 = gparents.pop() while len(gparents) > 0: p2 = commit_octopus(p1, p2) p1 = gparents.pop() else: if gparents: p1 = gparents.pop() pa = None if not (p2 == nullid): node1 = self.repo.changectx(p1) node2 = self.repo.changectx(p2) pa = node1.ancestor(node2) # if named branch, add to extra if hg_branch: extra['branch'] = hg_branch # if committer is different than author, add it to extra if commit.author != commit.committer \ or commit.author_time != commit.commit_time \ or commit.author_timezone != commit.commit_timezone: extra['committer'] = "%s %d %d" % ( commit.committer, commit.commit_time, -commit.commit_timezone) if commit.encoding: extra['encoding'] = commit.encoding if hg_branch: extra['branch'] = hg_branch if octopus: extra['hg-git'] = 'octopus-done' # TODO use 'n in self.repo' when we require hg 1.5 def repo_contains(n): try: return bool(self.repo.lookup(n)) except error.RepoLookupError: return False if not (repo_contains(p1) and repo_contains(p2)): raise hgutil.Abort( _('you appear to have run strip - ' 'please run hg git-cleanup')) ctx = context.memctx(self.repo, (p1, p2), text, list(files), getfilectx, author, date, extra) node = self.repo.commitctx(ctx) self.swap_out_encoding(oldenc) # save changeset to mapping file cs = hex(node) self.map_set(commit.id, cs)
def import_git_commit(self, commit): self.ui.debug(_("importing: %s\n") % commit.id) (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message) # get a list of the changed, added, removed files files = self.get_files_changed(commit) # Handle gitlinks: collect gitlinks = self.collect_gitlinks(commit.tree) git_commit_tree = self.git[commit.tree] # Analyze hgsubstate and build an updated version # using SHAs from gitlinks hgsubstate = None if gitlinks: hgsubstate = util.parse_hgsubstate(self.git_file_readlines(git_commit_tree, '.hgsubstate')) for path, sha in gitlinks: hgsubstate[path] = sha # in case .hgsubstate wasn't among changed files # force its inclusion files['.hgsubstate'] = (False, 0100644, None) # Analyze .hgsub and merge with .gitmodules hgsub = None gitmodules = self.parse_gitmodules(git_commit_tree) if gitmodules or gitlinks: hgsub = util.parse_hgsub(self.git_file_readlines(git_commit_tree, '.hgsub')) for (sm_path, sm_url, sm_name) in gitmodules: hgsub[sm_path] = '[git]' + sm_url files['.hgsub'] = (False, 0100644, None) elif commit.parents and '.gitmodules' in self.git[self.git[commit.parents[0]].tree]: # no .gitmodules in this commit, however present in the parent # mark its hg counterpart as deleted (assuming .hgsub is there # due to the same import_git_commit process files['.hgsub'] = (True, 0100644, None) date = (commit.author_time, -commit.author_timezone) text = strip_message origtext = text try: text.decode('utf-8') except UnicodeDecodeError: text = self.decode_guess(text, commit.encoding) text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n') if text + '\n' != origtext: extra['message'] = create_delta(text +'\n', origtext) author = commit.author # convert extra data back to the end if ' ext:' in commit.author: m = RE_GIT_AUTHOR_EXTRA.match(commit.author) if m: name = m.group(1) ex = urllib.unquote(m.group(2)) email = m.group(3) author = name + ' <' + email + '>' + ex if ' <none@none>' in commit.author: author = commit.author[:-12] try: author.decode('utf-8') except UnicodeDecodeError: origauthor = author author = self.decode_guess(author, commit.encoding) extra['author'] = create_delta(author, origauthor) oldenc = self.swap_out_encoding() def findconvergedfiles(p1, p2): # If any files have the same contents in both parents of a merge # (and are therefore not reported as changed by Git) but are at # different file revisions in Mercurial (because they arrived at # those contents in different ways), we need to include them in # the list of changed files so that Mercurial can join up their # filelog histories (same as if the merge was done in Mercurial to # begin with). if p2 == nullid: return [] manifest1 = self.repo.changectx(p1).manifest() manifest2 = self.repo.changectx(p2).manifest() return [path for path, node1 in manifest1.iteritems() if path not in files and manifest2.get(path, node1) != node1] def getfilectx(repo, memctx, f): info = files.get(f) if info != None: # it's a file reported as modified from Git delete, mode, sha = info if delete: raise IOError if not sha: # indicates there's no git counterpart e = '' copied_path = None if '.hgsubstate' == f: data = util.serialize_hgsubstate(hgsubstate) elif '.hgsub' == f: data = util.serialize_hgsub(hgsub) else: data = self.git[sha].data copied_path = hg_renames.get(f) e = self.convert_git_int_mode(mode) else: # it's a converged file fc = context.filectx(self.repo, f, changeid=memctx.p1().rev()) data = fc.data() e = fc.flags() copied_path = fc.renamed() return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path) gparents = map(self.map_hg_get, commit.parents) p1, p2 = (nullid, nullid) octopus = False if len(gparents) > 1: # merge, possibly octopus def commit_octopus(p1, p2): ctx = context.memctx(self.repo, (p1, p2), text, list(files) + findconvergedfiles(p1, p2), getfilectx, author, date, {'hg-git': 'octopus'}) return hex(self.repo.commitctx(ctx)) octopus = len(gparents) > 2 p2 = gparents.pop() p1 = gparents.pop() while len(gparents) > 0: p2 = commit_octopus(p1, p2) p1 = gparents.pop() else: if gparents: p1 = gparents.pop() pa = None if not (p2 == nullid): node1 = self.repo.changectx(p1) node2 = self.repo.changectx(p2) pa = node1.ancestor(node2) # if named branch, add to extra if hg_branch: extra['branch'] = hg_branch # if committer is different than author, add it to extra if commit.author != commit.committer \ or commit.author_time != commit.commit_time \ or commit.author_timezone != commit.commit_timezone: extra['committer'] = "%s %d %d" % ( commit.committer, commit.commit_time, -commit.commit_timezone) if commit.encoding: extra['encoding'] = commit.encoding if hg_branch: extra['branch'] = hg_branch if octopus: extra['hg-git'] ='octopus-done' # TODO use 'n in self.repo' when we require hg 1.5 def repo_contains(n): try: return bool(self.repo.lookup(n)) except error.RepoLookupError: return False if not (repo_contains(p1) and repo_contains(p2)): raise hgutil.Abort(_('you appear to have run strip - ' 'please run hg git-cleanup')) ctx = context.memctx(self.repo, (p1, p2), text, list(files) + findconvergedfiles(p1, p2), getfilectx, author, date, extra) node = self.repo.commitctx(ctx) self.swap_out_encoding(oldenc) # save changeset to mapping file cs = hex(node) self.map_set(commit.id, cs)