committer_email = email
committer_name = name
message = b'a'
# ASCII hex of parents.
parents = ()

# Blob.
blob_sha_ascii, blob_sha = util.save_object(b'blob', blob_content)
# Check sha matches Git.
blob_sha_git = util.get_git_hash_object(b'blob', blob_content)
assert blob_sha_ascii == blob_sha_git

# Tree.
tree_sha_ascii, tree_sha, tree_content = util.save_tree_object(blob_mode, blob_basename, blob_sha)
# Check sha matches Git.
tree_sha_git = util.get_git_hash_object(b'tree', tree_content)
assert tree_sha_ascii == tree_sha_git

# Commit.
commit_sha_ascii, commit_sha, commit_content = util.save_commit_object(
        tree_sha_ascii, parents,
        author_name, author_email, author_date,
        committer_name, committer_email, committer_date,
        message)
commit_sha_git = util.get_git_hash_object(b'commit', commit_content)
assert commit_sha_ascii == commit_sha_git

# Finish.
util.create_master(commit_sha_ascii)
util.clone()
Ejemplo n.º 2
0
email = b'*****@*****.**'
name = b''

util.init()

tree = util.create_tree_with_one_file()
commit = None
n = 1000000
for i in range(n):
    now = int(time.time())
    commit, _, _ = util.save_commit_object(
        tree,
        (commit, ),
        author_date_s=0,
        author_email=email,
        author_name=name,
        committer_date_s=0,
        committer_email=email,
        committer_name=name,
        message=b'',
    )
    if i % 100000 == 0:
        print(i)
        print(datetime.datetime.now())
        # Lose objects are too large and blow up the tmpfs.
        # Does clean packets, but the calculation takes more and more memory,
        # and slows down and blows up at the end. TODO which subcommand blows up eactly?.
        #subprocess.check_output(['git', 'gc'])
        subprocess.check_output(['git', 'repack'])
        subprocess.check_output(['git', 'prune-packed'])
        subprocess.check_output(['git', 'tag', str(i), commit])
tree = util.create_tree_with_one_file()
commit = None
n = 1000000
percent = (n / 100)
p = 0
i = 0

data_paths = sorted(os.listdir(data_dir_path))
for data_path in data_paths:
    data_path = os.path.join(data_dir_path, data_path)
    with open(data_path, 'rb') as f:
        for line in f:
            email = line.rstrip()[:255]
            commit, _, _ = util.save_commit_object(
                tree,
                (commit, ),
                author_email=email,
                committer_email=email,
            )
            if i % percent == 0:
                print(p)
                print(email)
                print(datetime.datetime.now())
                p += 1

                # Lose objects are too large and blow up the tmpfs.

                # Does clean packets, but the calculation takes more and more memory,
                # and slows down and blows up at the end. TODO which subcommand blows up eactly?.
                #subprocess.check_output(['git', 'gc'])

                subprocess.check_output(['git', 'repack'])
Ejemplo n.º 4
0
util.init()

tree = util.create_tree_with_one_file()
commit = None
n = 1000000
percent = (n / 100)
p = 0
for i in range(n):
    now = int(time.time())
    commit, _, _ = util.save_commit_object(
        tree,
        (commit,),
        author_date_s=now,
        author_email=email,
        author_name=name,
        committer_date_s=now,
        committer_email=email,
        committer_name=name,
        message=(str(i).encode('ascii')),
    )
    if i % percent == 0:
        print(p)
        print(datetime.datetime.now())
        p += 1

        # Lose objects are too large and blow up the tmpfs.

        # Does clean packets, but the calculation takes more and more memory,
        # and slows down and blows up at the end. TODO which subcommand blows up eactly?.
        #subprocess.check_output(['git', 'gc'])
tree = util.create_tree_with_one_file()
commit = None
n = 1000000
percent = (n / 100)
p = 0
i = 0

data_paths = sorted(os.listdir(data_dir_path))
for data_path in data_paths:
    data_path = os.path.join(data_dir_path, data_path)
    with open(data_path, 'rb') as f:
        for line in f:
            email = line.rstrip()[:255]
            commit, _, _ = util.save_commit_object(
                tree,
                (commit,),
                author_email=email,
                committer_email=email,
            )
            if i % percent == 0:
                print(p)
                print(email)
                print(datetime.datetime.now())
                p += 1

                # Lose objects are too large and blow up the tmpfs.

                # Does clean packets, but the calculation takes more and more memory,
                # and slows down and blows up at the end. TODO which subcommand blows up eactly?.
                #subprocess.check_output(['git', 'gc'])

                subprocess.check_output(['git', 'repack'])
name = b''

util.init()

tree = util.create_tree_with_one_file()
commit = None
if len(sys.argv) > 1:
    n = int(sys.argv[1])
else:
    n = 1000
base_commit, _, _ = util.save_commit_object(
    tree,
    (commit,),
    author_date_s=0,
    author_email=email,
    author_name=name,
    committer_date_s=0,
    committer_email=email,
    committer_name=name,
    message=b'',
)
parents = []
for i in range(n):
    now = int(time.time())
    commit, _, _ = util.save_commit_object(
        tree,
        (base_commit,),
        author_date_s=i,
        author_email=email,
        author_name=name,
        committer_date_s=0,
#!/usr/bin/env python3

"""
Git does not let a commit have twice the same parent, but GitHub does, and normally shows it.
But as of 2016-05-17 they didn't page this edge case, and it 502's the commit for large numbers of links.

If you increase the value a lot, when you clone and cd into the repo. your computer may bog down
if you show a git status on the bash, because Git memory explodes trying to parse that.
Actual compressed size is very small though, since gzip compresses all that repeated data very efficiently.

push would require an obscene ammount of memory (malloc fails on `ulimit -Sv`), so I couldn't test it.
"""

import itertools

import util

util.init()

tree = util.create_tree_with_one_file()
commit, _, _ = util.save_commit_object(tree, author_name=b'a')
commit, _, _ = util.save_commit_object(tree, itertools.repeat(commit, 10000000), author_name=b'b')

# Finish.
util.create_master(commit)
util.clone()
import time
import sys

import util

email = b'*****@*****.**'
name = b''

util.init()
tree = util.create_tree_with_one_file()
commit, _, _ = util.save_commit_object(
    tree,
    (None,),
    author_date_s=0,
    author_email=email,
    author_name=name,
    committer_date_s=0,
    committer_email=email,
    committer_name=name,
    message=b'',
)
tag_sha, _, _ = util.save_tag_object(
    commit,
    b'mytag',
    object_type=b'commit',
    user_name=name,
    user_email=email,
    date_s=0,
    message=b'abc'
)
util.create_master(commit)
Ejemplo n.º 9
0
#!/usr/bin/env python3
"""
Git does not let a commit have twice the same parent, but GitHub does, and normally shows it.
But as of 2016-05-17 they didn't page this edge case, and it 502's the commit for large numbers of links.

If you increase the value a lot, when you clone and cd into the repo. your computer may bog down
if you show a git status on the bash, because Git memory explodes trying to parse that.
Actual compressed size is very small though, since gzip compresses all that repeated data very efficiently.

push would require an obscene ammount of memory (malloc fails on `ulimit -Sv`), so I couldn't test it.
"""

import itertools

import util

util.init()

tree = util.create_tree_with_one_file()
commit, _, _ = util.save_commit_object(tree, author_name=b'a')
commit, _, _ = util.save_commit_object(tree,
                                       itertools.repeat(commit, 10000000),
                                       author_name=b'b')

# Finish.
util.create_master(commit)
util.clone()
Ejemplo n.º 10
0
util.init()

tree = util.create_tree_with_one_file()
commit = None
n = 1000000
percent = (n / 100)
p = 0
for i in range(n):
    now = int(time.time())
    commit, _, _ = util.save_commit_object(
        tree,
        (commit, ),
        author_date_s=now,
        author_email=email,
        author_name=name,
        committer_date_s=now,
        committer_email=email,
        committer_name=name,
        message=(str(i).encode('ascii')),
    )
    if i % percent == 0:
        print(p)
        print(datetime.datetime.now())
        p += 1

        # Lose objects are too large and blow up the tmpfs.

        # Does clean packets, but the calculation takes more and more memory,
        # and slows down and blows up at the end. TODO which subcommand blows up eactly?.
        #subprocess.check_output(['git', 'gc'])
name = b''

util.init()

tree = util.create_tree_with_one_file()
commit = None
if len(sys.argv) > 1:
    n = int(sys.argv[1])
else:
    n = 1000
base_commit, _, _ = util.save_commit_object(
    tree,
    (commit, ),
    author_date_s=0,
    author_email=email,
    author_name=name,
    committer_date_s=0,
    committer_email=email,
    committer_name=name,
    message=b'',
)
parents = []
for i in range(n):
    now = int(time.time())
    commit, _, _ = util.save_commit_object(
        tree,
        (base_commit, ),
        author_date_s=i,
        author_email=email,
        author_name=name,
        committer_date_s=0,