committer_email = email committer_name = name message = b'a' # ASCII hex of parents. parents = () # Blob. blob_sha_ascii, blob_sha = util.save_object(b'blob', blob_content) # Check sha matches Git. blob_sha_git = util.get_git_hash_object(b'blob', blob_content) assert blob_sha_ascii == blob_sha_git # Tree. tree_sha_ascii, tree_sha, tree_content = util.save_tree_object(blob_mode, blob_basename, blob_sha) # Check sha matches Git. tree_sha_git = util.get_git_hash_object(b'tree', tree_content) assert tree_sha_ascii == tree_sha_git # Commit. commit_sha_ascii, commit_sha, commit_content = util.save_commit_object( tree_sha_ascii, parents, author_name, author_email, author_date, committer_name, committer_email, committer_date, message) commit_sha_git = util.get_git_hash_object(b'commit', commit_content) assert commit_sha_ascii == commit_sha_git # Finish. util.create_master(commit_sha_ascii) util.clone()
email = b'*****@*****.**' name = b'' util.init() tree = util.create_tree_with_one_file() commit = None n = 1000000 for i in range(n): now = int(time.time()) commit, _, _ = util.save_commit_object( tree, (commit, ), author_date_s=0, author_email=email, author_name=name, committer_date_s=0, committer_email=email, committer_name=name, message=b'', ) if i % 100000 == 0: print(i) print(datetime.datetime.now()) # Lose objects are too large and blow up the tmpfs. # Does clean packets, but the calculation takes more and more memory, # and slows down and blows up at the end. TODO which subcommand blows up eactly?. #subprocess.check_output(['git', 'gc']) subprocess.check_output(['git', 'repack']) subprocess.check_output(['git', 'prune-packed']) subprocess.check_output(['git', 'tag', str(i), commit])
tree = util.create_tree_with_one_file() commit = None n = 1000000 percent = (n / 100) p = 0 i = 0 data_paths = sorted(os.listdir(data_dir_path)) for data_path in data_paths: data_path = os.path.join(data_dir_path, data_path) with open(data_path, 'rb') as f: for line in f: email = line.rstrip()[:255] commit, _, _ = util.save_commit_object( tree, (commit, ), author_email=email, committer_email=email, ) if i % percent == 0: print(p) print(email) print(datetime.datetime.now()) p += 1 # Lose objects are too large and blow up the tmpfs. # Does clean packets, but the calculation takes more and more memory, # and slows down and blows up at the end. TODO which subcommand blows up eactly?. #subprocess.check_output(['git', 'gc']) subprocess.check_output(['git', 'repack'])
util.init() tree = util.create_tree_with_one_file() commit = None n = 1000000 percent = (n / 100) p = 0 for i in range(n): now = int(time.time()) commit, _, _ = util.save_commit_object( tree, (commit,), author_date_s=now, author_email=email, author_name=name, committer_date_s=now, committer_email=email, committer_name=name, message=(str(i).encode('ascii')), ) if i % percent == 0: print(p) print(datetime.datetime.now()) p += 1 # Lose objects are too large and blow up the tmpfs. # Does clean packets, but the calculation takes more and more memory, # and slows down and blows up at the end. TODO which subcommand blows up eactly?. #subprocess.check_output(['git', 'gc'])
tree = util.create_tree_with_one_file() commit = None n = 1000000 percent = (n / 100) p = 0 i = 0 data_paths = sorted(os.listdir(data_dir_path)) for data_path in data_paths: data_path = os.path.join(data_dir_path, data_path) with open(data_path, 'rb') as f: for line in f: email = line.rstrip()[:255] commit, _, _ = util.save_commit_object( tree, (commit,), author_email=email, committer_email=email, ) if i % percent == 0: print(p) print(email) print(datetime.datetime.now()) p += 1 # Lose objects are too large and blow up the tmpfs. # Does clean packets, but the calculation takes more and more memory, # and slows down and blows up at the end. TODO which subcommand blows up eactly?. #subprocess.check_output(['git', 'gc']) subprocess.check_output(['git', 'repack'])
name = b'' util.init() tree = util.create_tree_with_one_file() commit = None if len(sys.argv) > 1: n = int(sys.argv[1]) else: n = 1000 base_commit, _, _ = util.save_commit_object( tree, (commit,), author_date_s=0, author_email=email, author_name=name, committer_date_s=0, committer_email=email, committer_name=name, message=b'', ) parents = [] for i in range(n): now = int(time.time()) commit, _, _ = util.save_commit_object( tree, (base_commit,), author_date_s=i, author_email=email, author_name=name, committer_date_s=0,
#!/usr/bin/env python3 """ Git does not let a commit have twice the same parent, but GitHub does, and normally shows it. But as of 2016-05-17 they didn't page this edge case, and it 502's the commit for large numbers of links. If you increase the value a lot, when you clone and cd into the repo. your computer may bog down if you show a git status on the bash, because Git memory explodes trying to parse that. Actual compressed size is very small though, since gzip compresses all that repeated data very efficiently. push would require an obscene ammount of memory (malloc fails on `ulimit -Sv`), so I couldn't test it. """ import itertools import util util.init() tree = util.create_tree_with_one_file() commit, _, _ = util.save_commit_object(tree, author_name=b'a') commit, _, _ = util.save_commit_object(tree, itertools.repeat(commit, 10000000), author_name=b'b') # Finish. util.create_master(commit) util.clone()
import time import sys import util email = b'*****@*****.**' name = b'' util.init() tree = util.create_tree_with_one_file() commit, _, _ = util.save_commit_object( tree, (None,), author_date_s=0, author_email=email, author_name=name, committer_date_s=0, committer_email=email, committer_name=name, message=b'', ) tag_sha, _, _ = util.save_tag_object( commit, b'mytag', object_type=b'commit', user_name=name, user_email=email, date_s=0, message=b'abc' ) util.create_master(commit)
util.init() tree = util.create_tree_with_one_file() commit = None n = 1000000 percent = (n / 100) p = 0 for i in range(n): now = int(time.time()) commit, _, _ = util.save_commit_object( tree, (commit, ), author_date_s=now, author_email=email, author_name=name, committer_date_s=now, committer_email=email, committer_name=name, message=(str(i).encode('ascii')), ) if i % percent == 0: print(p) print(datetime.datetime.now()) p += 1 # Lose objects are too large and blow up the tmpfs. # Does clean packets, but the calculation takes more and more memory, # and slows down and blows up at the end. TODO which subcommand blows up eactly?. #subprocess.check_output(['git', 'gc'])
name = b'' util.init() tree = util.create_tree_with_one_file() commit = None if len(sys.argv) > 1: n = int(sys.argv[1]) else: n = 1000 base_commit, _, _ = util.save_commit_object( tree, (commit, ), author_date_s=0, author_email=email, author_name=name, committer_date_s=0, committer_email=email, committer_name=name, message=b'', ) parents = [] for i in range(n): now = int(time.time()) commit, _, _ = util.save_commit_object( tree, (base_commit, ), author_date_s=i, author_email=email, author_name=name, committer_date_s=0,