Ejemplo n.º 1
0
def pull(repo: pygit2.Repository, remote_name: str = 'origin'):
    """
    Pull
    :param repo: the repository to pull
    :param remote_name: name of the remote
    :return:
    """
    for remote in repo.remotes:
        if remote.name == remote_name:
            remote.fetch()
            remote_master_id = repo.lookup_reference('refs/remotes/origin/master').target
            merge_result, _ = repo.merge_analysis(remote_master_id)
            # Up to date, do nothing
            if merge_result & pygit2.GIT_MERGE_ANALYSIS_UP_TO_DATE:
                return
            # We can just fastforward
            elif merge_result & pygit2.GIT_MERGE_ANALYSIS_FASTFORWARD:
                repo.checkout_tree(repo.get(remote_master_id))
                master_ref = repo.lookup_reference('refs/heads/master')
                master_ref.set_target(remote_master_id)
                repo.head.set_target(remote_master_id)
            elif merge_result & pygit2.GIT_MERGE_ANALYSIS_NORMAL:
                repo.merge(remote_master_id)
                print(repo.index.conflicts)

                assert repo.index.conflicts is None, 'Conflicts, ahhhh!'
                user = repo.default_signature
                tree = repo.index.write_tree()
                repo.create_commit('HEAD', user, user, 'Merge!', tree, [repo.head.target, remote_master_id])
                repo.state_cleanup()
            else:
                raise AssertionError('Unknown merge analysis result')
Ejemplo n.º 2
0
def git_is_clean(srcdir, project):
    repo = Repository(os.path.join(srcdir, project.workspace_path, ".git"))
    for _, b in iteritems(repo.status()):
        if b != GIT_STATUS_IGNORED and b != GIT_STATUS_CURRENT:
            return False, "has uncommitted changes"
    if repo.head_is_detached:
        return False, "has detached HEAD"
    origin = get_origin(repo, project)
    if not origin:
        return False, "has no upstream remote"
    remote_refs = []
    local_refs = {}
    for refname in repo.listall_references():
        if refname.startswith("refs/remotes/%s/" % origin.name):
            ref = repo.lookup_reference(refname)
            if ref.type == GIT_REF_OID:
                remote_refs.append(ref.target)
        elif not refname.startswith("refs/remotes/"):
            ref = repo.lookup_reference(refname)
            if ref.type == GIT_REF_OID:
                local_refs[ref.peel().id] = refname
    if not remote_refs:
        return False, "has no upstream remote branches"
    if not local_refs:
        return False, "has no local branches"
    if not repo.lookup_branch("%s/%s" % (origin.name, project.master_branch), GIT_BRANCH_REMOTE):
        return False, "has no upstream master branch"
    for remote_ref in remote_refs:
        for commit in repo.walk(remote_ref):
            if commit.id in local_refs:
                del local_refs[commit.id]
    if local_refs:
        return False, "has local commits: %s" % ", ".join(["'%s'" % name for _, name in iteritems(local_refs)])
    return True, ""
Ejemplo n.º 3
0
def pull_repo(repo_path: Path):
    """Update a repository at repo_path by pulling from the remote named origin."""
    repo = Repository(repo_path)
    remote = repo.remotes['origin']
    remote.fetch()
    master_id = repo.lookup_reference('refs/remotes/origin/master').target
    merge_result, _ = repo.merge_analysis(master_id)

    if merge_result & GIT_MERGE_ANALYSIS_UP_TO_DATE:
        return

    if merge_result & GIT_MERGE_ANALYSIS_FASTFORWARD:
        repo.checkout_tree(repo.get(master_id))
        master_ref = repo.lookup_reference('refs/heads/master')
        master_ref.set_target(master_id)
        repo.head.set_target(master_id)
    elif merge_result & GIT_MERGE_ANALYSIS_NORMAL:
        repo.merge(master_id)
        assert repo.index.conflicts is None, \
            'Merge conflicts, please manually fix'
        tree = repo.index.write_tree()
        repo.create_commit('refs/heads/master', SIGNATURE, SIGNATURE,
                           '[build-server]: Merge', tree,
                           [repo.head.target, master_id])
        repo.state_cleanup()
Ejemplo n.º 4
0
    def launch(self):
        # init repo
        self._info('Locating the repository..')
        try:
            repo = Repository(REPO_PATH)
        except GitError:
            self._error('Failed to locate the repository!')
            return

        # fetch
        self._info('Fetching repository..')
        try:
            repo.remotes['origin'].fetch()
        except GitError:
            self._error('Failed to fetch the repository!')

        # reset repository
        self._info('Resetting the repository..')
        try:
            repo.reset(
                repo.lookup_reference('refs/remotes/origin/master').target, GIT_RESET_HARD)
        except GitError:
            self._error('Failed to reset the repository!')

        # launch
        self._info('Launching..')
        try:
            subprocess.Popen(EXE_PATH, cwd=EXE_WORKDIR)
        except OSError:
            self._error('Failed to launch!')
        else:
            self.destroy()
Ejemplo n.º 5
0
def sync_handler(fork_from: str, from_sha: str, repo_name: str,
                 ticket_id: int, pr_url: str):
    output_path = '{}.txt'.format(pr_url.split('/', 3)[3].rsplit('/', 2)[0])
    output_path = os.path.join(WORK_DIR, output_path.replace('/', '_'))
    work_tree = os.path.join(WORK_DIR, fork_from)
    parent_path = os.path.dirname(work_tree)
    if not os.path.exists(parent_path):
        os.makedirs(parent_path)
    if not os.path.exists(work_tree):
        repo = clone_repository(
            '{0}{1}.git'.format(GITHUB_URL, fork_from), work_tree)
    else:
        repo = Repository(work_tree)

    remote_name = repo_name.split('/')[0]
    update_remote(work_tree, repo, repo_name, remote_name)

    if remote_name == 'origin':
        commit = repo.revparse_single(from_sha)
        repo.checkout_tree(commit, strategy=GIT_CHECKOUT_FORCE)
    else:
        ref_name = 'refs/pull/{0}/head'.format(ticket_id)
        try:
            repo.create_reference(ref_name, from_sha)
        except ValueError:
            pass
        ref = repo.lookup_reference(ref_name)
        repo.checkout(ref, strategy=GIT_CHECKOUT_FORCE)
    cwd = os.getcwd()
    os.chdir(work_tree)
    subprocess.call(
        '{} . --output-file={}'.format(FLAKE8_EXECUTABLE, output_path),
        shell=True)
    os.chdir(cwd)
    return output_path
Ejemplo n.º 6
0
def init_repo(repo_path, clone_from=None, clone_refs=False,
              alternate_repo_paths=None, is_bare=True):
    """Initialise a new git repository or clone from existing."""
    assert is_valid_new_path(repo_path)
    init_repository(repo_path, is_bare)

    if clone_from:
        # The clone_from's objects and refs are in fact cloned into a
        # subordinate tree that's then set as an alternate for the real
        # repo. This lets git-receive-pack expose available commits as
        # extra haves without polluting refs in the real repo.
        sub_path = os.path.join(repo_path, 'turnip-subordinate')
        clone_repository(clone_from, sub_path, True)
        assert is_bare
        alt_path = os.path.join(repo_path, 'objects/info/alternates')
        with open(alt_path, 'w') as f:
            f.write('../turnip-subordinate/objects\n')

        if clone_refs:
            # With the objects all accessible via the subordinate, we
            # can just copy all refs from the origin. Unlike
            # pygit2.clone_repository, this won't set up a remote.
            # TODO: Filter out internal (eg. MP) refs.
            from_repo = Repository(clone_from)
            to_repo = Repository(repo_path)
            for ref in from_repo.listall_references():
                to_repo.create_reference(
                    ref, from_repo.lookup_reference(ref).target)

    if alternate_repo_paths:
        write_alternates(repo_path, alternate_repo_paths)
    ensure_config(repo_path)  # set repository configuration defaults
    return repo_path
Ejemplo n.º 7
0
def get_current_branch(path_to_repository):
    """Utility method for getting the current branch from a Git repository.

        Args:
            path_to_repository (str): Path to the Git repository

        Returns:
            str: The current branch in the provided repository.
    """
    repo = Repository(path_to_repository)
    head = repo.lookup_reference("HEAD").resolve()
    return head.shorthand
Ejemplo n.º 8
0
class Git(object):
    r"""
    Interact with a git repository.
    """

    def __init__(self, gitdir):
        r"""
        Take a path to the git repository. Other methods interact with
        this git repository.
        """
        self.repo = Repository(gitdir)

    def branches(self):
        r"""
        Return the list of a branch name and its last commit id.
        """
        return self._refs('heads')

    def tags(self):
        r"""
        Return the list of a tag name and its last commit id.
        """
        return self._refs('tags')

    def _refs(self, type):
        refs = {}
        pattern = re.compile(r'refs/%s/(.*)$' % type)
        for ref in self.repo.listall_references():
            m = pattern.match(ref)
            if m:
                reference = self.repo.lookup_reference(ref)
                refs[m.group(1)] = reference.hex
        return refs

    def create_branch(self, name, target):
        r"""
        Create new branch.
        """
        if not is_valid_value(name):
            raise InvalidParamException("name is required")
        if not is_valid_hex(target):
            raise InvalidParamException("target is required")

        target = sha_hex2bin(target)
        try:
            self.repo.create_reference('refs/heads/%s' % name, target)
        except Exception, e:
            raise InvalidParamException(str(e))

        return True
Ejemplo n.º 9
0
def shift(amount, repo_path):
    repo = Repository(repo_path)
    head = repo.lookup_reference('HEAD').resolve()
    adder = partial(add, amount=amount)
    changelog = dict()
    reference = REF_FMT.format(time=time(), pid=getpid())
    for commit in repo.walk(head.oid, GIT_SORT_REVERSE | GIT_SORT_TOPOLOGICAL):
        newmsg, nsubs = ISSUE_RE.subn(adder, commit.message)
        if nsubs != 0 or any(pnt.oid in changelog for pnt in commit.parents):
            parents = [changelog.get(c.oid, c.oid) for c in commit.parents]
            new_oid = repo.create_commit(reference, commit.author,
                    commit.committer, newmsg, commit.tree.oid, parents)
            changelog[commit.oid] = new_oid
    return changelog, reference
Ejemplo n.º 10
0
def merge(dir, reference, msg):
    repo = Repository(dir)
    reference = "refs/heads/" + reference
    print reference
    other_branch_ref = repo.lookup_reference(reference)
    other_branch_tip = other_branch_ref.target
    # repo.merge(other_branch_tip)
    print('merge complete!')
    user = repo.default_signature
    tree = repo.index.write_tree()
    message = msg
    new_commit = repo.create_commit('HEAD', user, user, message, tree,
                                    [repo.head.target, other_branch_tip])
    print(new_commit)
Ejemplo n.º 11
0
def git_is_clean(srcdir, project):
    repo = Repository(os.path.join(srcdir, project.workspace_path, ".git"))
    for _, b in iteritems(repo.status()):
        if b != GIT_STATUS_IGNORED and b != GIT_STATUS_CURRENT:
            return False, "has uncommitted changes"
    if repo.head_is_detached:
        return False, "has detached HEAD"
    origin = get_origin(repo, project)
    if not origin:
        return False, "has no upstream remote"
    remote_refs = []
    local_refs = {}
    for refname in repo.listall_references():
        if refname.startswith("refs/remotes/%s/" % origin.name):
            ref = repo.lookup_reference(refname)
            if ref.type == GIT_REF_OID:
                remote_refs.append(ref.target)
        elif not refname.startswith("refs/remotes/"):
            ref = repo.lookup_reference(refname)
            if ref.type == GIT_REF_OID:
                local_refs[ref.peel().id] = refname
    if not remote_refs:
        return False, "has no upstream remote branches"
    if not local_refs:
        return False, "has no local branches"
    if not repo.lookup_branch("%s/%s" % (origin.name, project.master_branch),
                              GIT_BRANCH_REMOTE):
        return False, "has no upstream master branch"
    for remote_ref in remote_refs:
        for commit in repo.walk(remote_ref):
            if commit.id in local_refs:
                del local_refs[commit.id]
    if local_refs:
        return False, "has local commits: %s" % ", ".join(
            ["'%s'" % name for _, name in iteritems(local_refs)])
    return True, ""
Ejemplo n.º 12
0
class Repo:
    def __init__(self, config):
        self.repo = Repository(config.repository_path)
        self.config = config
        self.lock = threading.Lock()
        master_ref = self.repo.lookup_reference("refs/heads/master")
        self.repo.checkout(master_ref)
        self.cred = MyRemoteCallback(config)

    def lock_patch_work(self, id):
        self.lock.acquire(True)
        try:
            #first lets update master
            self.repo.remotes[self.config.repository_patch_origin].fetch()
            #get the latest master
            master_ref = self.repo.branches.remote[
                self.config.repository_patch_origin + '/master']
            #In case the branch exists, delete it
            if id in self.repo.branches:
                self.repo.branches.delete(id)
            #create a new branch
            local = self.repo.branches.local.create(id, master_ref.peel())
            #finally switch over
            self.repo.checkout(local)
        except Exception as e:
            self.lock.release()
            raise e

    def unlock_patch_work(self, id):
        try:
            self.repo.remotes[self.config.repository_patch_destination].push(
                ["+refs/heads/" + id], callbacks=self.cred)
            master_ref = self.repo.branches.remote[
                self.config.repository_patch_origin + '/master']
            self.repo.checkout(master_ref)
        finally:
            self.lock.release()

    def dispatch(self, id):
        #FIXME also delete on the remote
        self.repo.branches.delete(id)

    def fetch_commit_message(self, chash):
        obj = self.repo.get(chash)
        return obj.message
Ejemplo n.º 13
0
    def _fast_forward(self, local_path, merge_target, branch):
        # fast-forward all the branches.
        # pygit2 repo
        repo = Repository(discover_repository(local_path))

        # convert merge_target from hex into oid.
        fetch_head = repo.revparse_single(merge_target)

        # try to resolve a common anscestor between fetched and local
        try:
            head = repo.revparse_single(branch)
        except KeyError:
            # Doesn't exist.  Create and done.
            repo.create_reference(branch, fetch_head.oid)
            return True, 'Created new branch: %s' % branch

        if head.oid == fetch_head.oid:
            return True, 'Source and target are identical.'

        # raises KeyError if no merge bases found.
        common_oid = repo.merge_base(head.oid, fetch_head.oid)

        # Three different outcomes between the remaining cases.
        if common_oid.hex not in (head.oid.hex, fetch_head.oid.hex):
            # common ancestor is beyond both of these, not going to
            # attempt a merge here and will assume this:
            return False, 'Branch will diverge.'
        elif common_oid.hex == fetch_head.oid.hex:
            # Remote is also the common ancestor, so nothing to do.
            return True, 'No new changes found.'

        # This case remains: common_oid.hex == head.oid.hex, meaning
        # this local repository is the ancestor of further changes
        # fetched from the remote - remote newer, so fast-forward.
        ref = repo.lookup_reference(branch)
        ref.delete()

        repo.create_reference(branch, fetch_head.oid)

        return True, 'Fast-forwarded branch: %s' % branch
Ejemplo n.º 14
0
class Project1Test(unittest.TestCase):

    def __init__(self, test_name, grader):
        super(Project1Test, self).__init__(test_name)
        self.grader = grader

    # setUpClass doesn't work!?
    def setUp(self):
        self.out_string = ""
        self.repo = REPO_TEMPLATE % self.grader.andrewid
      	os.chdir(self.grader.tmp_dir)
        try:
            self.tearDown()
        except:
            pass
        self.git_clone(self.grader.root_repo)
        os.chdir(self.repo)
        self.repository = Repository('.git')
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        self.ran = False
        self.port = random.randint(1025, 9999)
        self.tls_port = random.randint(1025, 9999)
        print '\nUsing ports: %d,%d' % (self.port, self.tls_port)


    def pAssertEqual(self, arg1, arg2):
        try:
            self.assertEqual(arg1, arg2)
        except Exception as e:
            self.print_str(traceback.format_stack()[-2])
            raise e

    def pAssertTrue(self, test):
        try:
            self.assertTrue(test)
        except Exception as e:
            self.print_str(traceback.format_stack()[-2])
            raise e

    def print_str(self, prt_str):
        print(prt_str)
        self.out_string += ("\n" + prt_str)

    def edit_notes(self, new_note):
        notef = self.grader.notes
        try:
            check_both('cat %s' % (notef), False)
            new_note = '\n'+new_note
        except:
            pass
        check_both('echo "%s\nGood." >> %s' % (new_note, notef), False)
        check_both('%s %s' % (self.grader.editor,notef))

    def confirm(self):
        print '-----------------------------------------------'
        test = raw_input('OK [y/n]? ').lower() in ['y','']
        self.pAssertTrue(test)

    def change_cgi(self, new_path):
        self.grader.cgi = new_path

    def liso_name(self):
        name = './lisod'
        # text = raw_input('liso name? ').strip()
        # if text: name = text
        self.liso_name = name
        return name

    def get_path(self):
        path = None
        # text = raw_input('path? ').strip()
        # if text: path = text
        return path

    def get_port(self):
        port = self.port
        # text = raw_input('port? ').strip()
        # if text: port = int(text)
        self.port = port
        print port
        return port

    def get_tls_port(self):
        tls_port = self.tls_port
        # text = raw_input('tls_port? ').strip()
        # if text: tls_port = int(text)
        self.tls_port = tls_port
        print tls_port
        return tls_port

    def find_path(self, name, tree, path='./', d=0):
        if d == 15: return None
        name = name.lower().strip()

        # bredth first...?
        for entry in tree:
            if entry.name.lower().strip() == name:
                return path

        # now check depth...?
        entries = [e for e in tree]
        for entry in reversed(entries):
            obj = self.repository[entry.oid]
            if isinstance(obj, Tree):
                obj = self.find_path(name, obj, os.path.join(path, entry.name), d+1)
                if obj:
                    return obj
        return None

    def find_file(self, name, tree, d=0):
        if d == 15: return None
        name = name.lower().strip()

        # bredth first...?
        for entry in tree:
            if entry.name.lower().strip() == name:
                resolved = self.repository[entry.oid]
                if not isinstance(resolved, Blob):
                    continue
                return resolved

        # now check depth...?
        entries = [e for e in tree]
        for entry in reversed(entries):
            obj = self.repository[entry.oid]
            if isinstance(obj, Tree):
                obj = self.find_file(name, obj, d+1)
                if obj:
                    return obj
        return None

    def run_lisod(self, tree):
        path = self.get_path()
        liso = self.liso_name()
        port = self.get_port()
        tls_port = self.get_tls_port()
        if not path: path = self.find_path('Makefile', tree)
        print 'switching to: %s' % path
        os.chdir(path)
        check_both('make clean', False, False)
        check_output('make')
        self.ran = True
        resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
        cmd = '%s %d %d %slisod.log %slisod.lock %s %s %s %s&' % (liso, port, tls_port, self.grader.tmp_dir, self.grader.tmp_dir, self.grader.www[:-1], self.grader.cgi, self.grader.priv_key, self.grader.cert)
        #cmd = 'nohup ' + cmd
        #cmd = cmd + " > /dev/null"
        print cmd
        self.pAssertEqual(0, os.system(cmd))
        return liso

    def git_clone(self, repourl):
        with open('/dev/null', 'w') as f:
            self.pAssertEqual(0, check_call(['git','clone', repourl], stderr=f,
                             stdout=f))

    def git_checkout(self, commit_hex):
        with open('/dev/null', 'w') as f:
            self.pAssertEqual(0, 
                             check_call(['git','checkout','%s' % commit_hex],
                                        stdout=f, stderr=f))

    def resolve_tag(self):
        try:
            tag = self.repository.lookup_reference('refs/tags/checkpoint-%d' % self.grader.cp_num)
        except KeyError:
            try:
                tag = self.repository.lookup_reference('refs/tags/checkpoint_%d' % self.grader.cp_num)
            except KeyError:
                tag = self.repository.lookup_reference('refs/tags/checkpoint%d' % self.grader.cp_num)
        #tag = self.repository.lookup_reference('refs/tags/regrade')
        commit = self.repository[tag.target]
        while isinstance(commit, Tag): commit = self.repository[commit.target]
        return commit


    def check_headers(self, response_type, headers, length_content, ext):
        self.pAssertEqual(headers['Server'].lower(), 'liso/1.0')

        try:
            datetime.datetime.strptime(headers['Date'], '%a, %d %b %Y %H:%M:%S %Z')
        except KeyError:
            self.print_str('Bad Date header')
        except:
            self.print_str('Bad Date header: %s' % (headers['Date']))
        
        self.pAssertEqual(int(headers['Content-Length']), length_content)
        #self.pAssertEqual(headers['Connection'].lower(), 'close')

        if response_type == 'GET' or response_type == 'HEAD':
            header_set = set(['connection', 'content-length',
                              'date', 'last-modified',
                              'server', 'content-type'])
            self.pAssertEqual(set(), header_set - set(headers.keys()))
            if headers['Content-Type'].lower() != MIME[ext]:
                self.print_str('MIME got %s expected %s' % (headers['Content-Type'].lower(), MIME[ext]))
            self.pAssertTrue(headers['Content-Type'].lower() == MIME[ext] or
                            headers['Content-Type'].lower() == MIME['.html'])

            try:
                datetime.datetime.strptime(headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z')
            except:
                self.print_str('Bad Last-Modified header: %s' % (headers['Last-Modified']))
        elif response_type == 'POST':
            header_set = set(['connection', 'content-length',
                              'date', 'server'])
            self.pAssertEqual(set(), header_set - set(headers.keys()))
        else:
            self.fail('Unsupported Response Type...')


    # test existence of tag in repo
    def test_tag_checkpoint(self):
        self.print_str('\n\n----- Testing Tag -----')
        self.repository.lookup_reference('refs/tags/checkpoint-%d' % self.grader.cp_num)

    # test turn in timestamp
    def test_timestamp(self):
        self.print_str('\n\n----- Testing Timestamp -----')
        commit = self.resolve_tag()
        self.print_str('ref/tags/checkpoint-%d: %s' % (self.grader.cp_num, commit.hex))
        self.print_str('Due: %s' % self.grader.due_date)
        utctime = datetime.datetime.utcfromtimestamp(commit.commit_time)
        utcoffset = datetime.timedelta(minutes=commit.commit_time_offset)
        timestamp = utctime + utcoffset
        self.print_str('Timestamp: %s' % timestamp)
        timediff = timestamp - self.grader.due_date
        if timediff.days >= 0 and\
           timediff.seconds > 0 or\
           timediff.microseconds > 0:
               raise ValueError

    # test readme.txt file up to snuff
    def test_readme_file(self):
        self.print_str('\n\n----- Testing readme.txt file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- readme.txt -----'
        readme = self.find_file('readme.txt', tree)
        print readme.data,
        self.confirm()
        self.edit_notes('README:')

    # test vulnerabilities.txt up to snuff
    def test_vulnerabilities_file(self):
        self.print_str('\n\n----- Testing vulnerabilities.txt file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- vulnerabilities.txt -----'
        vulnerable = self.find_file('vulnerabilities.txt', tree)
        print vulnerable.data,
        self.confirm()
        self.edit_notes('VULNERABILITIES:')

    # test tests.txt up to snuff
    def test_tests_file(self):
        self.print_str('\n\n----- Testing tests.txt file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- tests.txt -----'
        tests = self.find_file('tests.txt', tree)
        print tests.data,
        self.confirm()
        self.edit_notes('TESTS:')

    # test Makefile up to snuff
    def test_Makefile_file(self):
        self.print_str('\n\n----- Testing Makefile file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- Makefile -----'
        Makefile = self.find_file('Makefile', tree)
        print Makefile.data,
        self.confirm()
        self.edit_notes('MAKEFILE:')

    # test if source up to snuff
    def test_inspect_source(self):
        self.print_str('\n\n----- Inspect Source cod *.[c|h] -----')
        self.print_str(self.grader.source_reminder)
        self.pAssertEqual(0, check_call(['bash']))
        self.confirm()
        self.edit_notes('SOURCE:')

    # tests if make properly creates lisod...
    def test_lisod_file(self):
        self.print_str('\n\n----- Testing make -----')
        commit = self.resolve_tag()
        path = self.get_path()
        if not path: path = self.find_path('Makefile', commit.tree)
        os.chdir(path)
        check_output('make')
        self.pAssertTrue(os.path.exists('./lisod'))

    # send all test files to their server
    # get output, give 3 second timeout
    # check sha's of output
    def test_replays(self):
        self.print_str('\n\n----- Testing Replays -----')
        commit = self.resolve_tag()
        self.run_lisod(commit.tree)
        time.sleep(3)
        replays_dir = os.path.join(self.grader.tmp_dir, 'replays')
        if not os.path.exists(replays_dir):
            os.makedirs(replays_dir)
        files = os.listdir(replays_dir)
        num_passed = 0
        num_files = 0
        for fname in files:
            basename, extension = os.path.splitext(fname)
            if extension == '.test': 
                num_files += 1
                self.print_str('testing %s...' % fname)
                fname = os.path.join(self.grader.tmp_dir + 'replays', fname)
                outfile = os.path.join(self.grader.tmp_dir + 'replays', '%s_%s.out' % (basename, self.repo))
                command = 'ncat -i 1s localhost %d < %s > %s' % (self.port, fname, outfile)

                check_both(command, False, False)
                with open(os.path.join(self.grader.tmp_dir + 'replays', basename+'.out')) as f:
                    with open(outfile) as f2:
                        outhash = hashlib.sha256(f.read()).hexdigest()
                        out2hash = hashlib.sha256(f2.read()).hexdigest()
                        if outhash == out2hash:
                            self.print_str('ok')
                            num_passed += 1
                        else:
                            self.print_str('failed')
                check_both('rm %s' % outfile)
        self.print_str('passed %d of %d' % (num_passed, num_files))
        self.pAssertEqual(num_passed,num_files)

    def test_HEAD_headers(self):
        self.print_str('----- Testing Headers -----')
        tests = {
            'http://127.0.0.1:%d/index.html' : 
            ('f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd', 802),
            'http://127.0.0.1:%d/images/liso_header.png' :
            ('abf1a740b8951ae46212eb0b61a20c403c92b45ed447fe1143264c637c2e0786', 17431),
            'http://127.0.0.1:%d/style.css' :
            ('575150c0258a3016223dd99bd46e203a820eef4f6f5486f7789eb7076e46736a', 301)
                }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root,ext = os.path.splitext(test)
            response = requests.head(test % self.port, timeout=10.0)
            self.check_headers(response.request.method,
                               response.headers,
                               tests[test][1],
                               ext)

    def test_HEAD(self):
        self.print_str('----- Testing HEAD -----')
        tests = {
            'http://127.0.0.1:%d/index.html' : 
            ('f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd', 802),
            'http://127.0.0.1:%d/images/liso_header.png' :
            ('abf1a740b8951ae46212eb0b61a20c403c92b45ed447fe1143264c637c2e0786', 17431),
            'http://127.0.0.1:%d/style.css' :
            ('575150c0258a3016223dd99bd46e203a820eef4f6f5486f7789eb7076e46736a', 301)
                }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root,ext = os.path.splitext(test)
            response = requests.head(test % self.port, timeout=10.0)
            contenthash = hashlib.sha256(response.content).hexdigest()
            self.pAssertEqual(200, response.status_code)

    def test_GET(self):
        self.print_str('----- Testing GET -----')
        tests = {
            'http://127.0.0.1:%d/index.html' : 
            'f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd',
            'http://127.0.0.1:%d/images/liso_header.png' :
            'abf1a740b8951ae46212eb0b61a20c403c92b45ed447fe1143264c637c2e0786',
            'http://127.0.0.1:%d/style.css' :
            '575150c0258a3016223dd99bd46e203a820eef4f6f5486f7789eb7076e46736a'
                }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root,ext = os.path.splitext(test)
            response = requests.get(test % self.port, timeout=10.0)
            contenthash = hashlib.sha256(response.content).hexdigest()
            self.pAssertEqual(200, response.status_code)
            self.pAssertEqual(contenthash, tests[test])

    def test_POST(self):
        self.print_str('----- Testing POST -----')
        tests = {
            'http://127.0.0.1:%d/index.html' : 
            'f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd',
                }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root,ext = os.path.splitext(test)
            # for checkpoint 2, this should time out; we told them to swallow the data and ignore
            try:
                response = requests.post(test % self.port, data='dummy data', timeout=3.0)
            #except requests.exceptions.Timeout:
            except requests.exceptions.RequestException:
                print 'timeout'
                continue
            except socket.timeout:
                print 'socket.timeout'
                continue

            # if they do return something, make sure it's OK
            self.pAssertEqual(200, response.status_code)
       

    def test_bw(self):
        print '(----- Testing BW -----'
        check_output('echo "----- Testing BW ----" >> %s' % self.grader.results)
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        self.pAssertEqual(0, os.system('curl -m 10 -o /dev/null http://127.0.0.1:%d/big.html 2>> %s' % (self.port, self.grader.results)))


    def tearDown(self):
        #check_both('rm ' + self.grader.tmp_dir + 'lisod.log', False, False)
        check_both('rm ' + self.grader.tmp_dir + 'lisod.lock', False, False)
        os.chdir(self.grader.tmp_dir)
        shutil.rmtree(self.repo)
        if sys.exc_info() == (None, None, None): #test succeeded
            self.out_string += '\nok'
        else:
            self.out_string += '\nfailed'
        if self.out_string:
            check_both('echo "%s" >> %s' % (self.out_string, self.grader.results))
        if self.ran:
            print 'trying "killall -9 %s"' % os.path.basename(self.liso_name)
            check_both('killall -9 %s' % os.path.basename(self.liso_name), True, False)
Ejemplo n.º 15
0
class CollectGit(object):
    """
    Small Helper class for small repositories.
    This does not scale because we hold a lot of data in memory.
    """

    _regex_comment = re.compile(
        r"(//[^\"\n\r]*(?:\"[^\"\n\r]*\"[^\"\n\r]*)*[\r\n]|/\*([^*]|\*(?!/))*?\*/)(?=[^\"]*(?:\"[^\"]*\"[^\"]*)*$)"
    )
    _regex_jdoc_line = re.compile(r"(- |\+)\s*(\*|/\*).*")

    def __init__(self, path):
        if not path.endswith('.git'):
            if not path.endswith('/'):
                path += '/'
            path += '.git'
        self._log = logging.getLogger(self.__class__.__name__)
        self._path = path
        self._repo = Repository(self._path)
        self._hunks = {}

        self._file_actions = {}
        self._bugfix = {}
        self._msgs = {}
        self._days = {}
        self._cdays = {}
        self._branches = {}
        self._tags = {}

        self._dopts = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES
        self._SIMILARITY_THRESHOLD = 50
        self._graph = nx.DiGraph()

    @classmethod
    def clone_repo(cls, uri, local_path):
        project_name = uri.split('/')[-1].split('.git')[0]
        repo_path = local_path + '/' + project_name + '/'

        if os.path.isdir(repo_path):
            c = subprocess.run(['git', 'fetch'],
                               cwd=repo_path,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
            if c.returncode != 0:
                err = 'Error pulling repository {} to {}'.format(
                    uri, repo_path)
                raise Exception(err)
        else:
            os.mkdir(repo_path)
            c = subprocess.run(['git', 'clone', uri, repo_path],
                               cwd=repo_path,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
            if c.returncode != 0:
                err = 'Error cloning repository {} to {}'.format(
                    uri, repo_path)
                raise Exception(err)
        return repo_path

    def _changed_lines(self, hunk):
        added_lines = []
        deleted_lines = []

        del_line = hunk['old_start']
        add_line = hunk['new_start']

        for line in hunk['content'].split('\n'):

            tmp = line[1:].strip()
            # is_comment = tmp.startswith('//') or tmp.startswith('/*') or tmp.startswith('*')

            if line.startswith('+'):
                added_lines.append((add_line, tmp))
                del_line -= 1
            if line.startswith('-'):
                deleted_lines.append((del_line, tmp))
                add_line -= 1

            del_line += 1
            add_line += 1

        return added_lines, deleted_lines

    def _comment_only_change(self, content):
        content = content + '\n'  # required for regex to drop comments
        content = re.sub(self._regex_comment, "", content)
        removed = ''
        added = ''
        for line in content.split('\n'):
            line = re.sub(
                r"\s+", " ", line, flags=re.UNICODE
            )  # replace all kinds of whitespaces (also multiple) with sińgle whitespace
            if not re.match(self._regex_jdoc_line, line):
                if line.startswith('-'):
                    removed += line[1:].strip()
                elif line.startswith('+'):
                    added += line[1:].strip()
        return removed == added

    def _blame_lines(self,
                     revision_hash,
                     filepath,
                     strategy,
                     ignore_lines=False,
                     validated_bugfix_lines=False):
        """We want to find changed lines for one file in one commit (from the previous commit).

        For this we are iterating over the diff and counting the lines that are deleted (changed) from the original file.
        We ignore all added lines.

        ignore_lines is already specific to all changed hunks of the file for which blame_lines is called
        """
        c = self._repo.revparse_single('{}'.format(revision_hash))
        self._hunks[revision_hash] = self._get_hunks(c)

        changed_lines = []
        if revision_hash not in self._hunks.keys(
        ) or not self._hunks[revision_hash]:
            return changed_lines

        for h in self._hunks[revision_hash]:
            if h['new_file'] != filepath:
                continue

            # only whitespace or comment changes in the hunk, ignore
            if strategy == 'code_only' and self._comment_only_change(
                    h['content']):
                self._log.debug(
                    'detected whitepace or comment only change in {} for {}'.
                    format(revision_hash, filepath))
                continue

            added, deleted = self._changed_lines(h)
            for dt in deleted:
                if dt not in changed_lines and dt[1]:
                    if strategy == 'code_only' and dt[1].startswith(
                        ('//', '/*', '*')):
                        continue

                    # we may only want validated lines
                    if validated_bugfix_lines is not False:
                        if dt[0] not in validated_bugfix_lines:
                            continue

                    # we may ignore lines, e.g., refactorings
                    if ignore_lines:
                        ignore = False
                        for start_line, end_line in ignore_lines:
                            if start_line <= dt[0] <= end_line:
                                ignore = True
                                break

                        # if we hit the line in our ignore list we continue to the next
                        if ignore:
                            # self._log.warn('ignore line {} in file {} in commit {} because of refactoring detection'.format(dt[0], filepath, revision_hash))
                            continue

                    changed_lines.append(dt)

        return changed_lines

    def blame(self,
              revision_hash,
              filepath,
              strategy='code_only',
              ignore_lines=False,
              validated_bugfix_lines=False):
        """Collect a list of commits where the given revision and file were last changed.

        Uses git blame.

        :param str revision_hash: Commit for which we want to collect blame commits.
        :param str filepath: File for which we want to collect blame commits.
        :rtype: list
        :returns: A list of tuples of blame commits and the original file for the given parameters.
        """
        commits = []

        # - ignore if commit is not in graph
        if revision_hash not in self._graph:
            return []

        # # - ignore package-info.java
        # if strategy == 'code_only' and filepath.lower().endswith('package-info.java'):
        #     self._log.debug('skipping blame on revision: {} for file {} because it is package-info.java'.format(revision_hash, filepath))
        #     return []

        # # - ignore test/ /test/ example/ examples/
        # if strategy == 'code_only' and re.match(self._regex_test_example, filepath):
        #     self._log.debug('skipping blame on revision: {} for file {} because it is a test or an example'.format(revision_hash, filepath))
        #     return []

        # bail on multiple parents
        parents = list(self._graph.predecessors(revision_hash))
        if len(parents) > 1:
            self._log.debug(
                'skipping blame on revision: {} because it is a merge commit'.
                format(revision_hash))
            return []

        changed_lines = self._blame_lines(revision_hash, filepath, strategy,
                                          ignore_lines, validated_bugfix_lines)
        parent_commit = self._repo.revparse_single('{}^'.format(revision_hash))

        blame = self._repo.blame(filepath,
                                 flags=GIT_BLAME_TRACK_COPIES_SAME_FILE,
                                 newest_commit=parent_commit.hex)
        for lineno, line in changed_lines:
            # returns blamehunk for specific line
            try:
                bh = blame.for_line(lineno)
            except IndexError as e:
                # this happens when we have the wrong parent node
                bla = 'tried to get file: {}, line: {}, revision: {}, blame commit: {}'.format(
                    filepath, lineno, revision_hash, str(bh.orig_commit_id))
                self._log.error(bla)
                raise  # this is critical

            inducing_commit = self._repo.revparse_single(str(
                bh.orig_commit_id))

            # start = bh.orig_start_line_number
            # lines = bh.lines_in_hunk
            # final_start = bh.final_start_line_number
            # print(revision_hash, '->', inducing_commit.hex)
            # print('original: {}: {}'.format(lineno, line))
            # print('{},{}: {},{}'.format(start, lines, final_start, lines))

            # blame_lines = []
            # for hunk in self._hunks[inducing_commit.hex]:
            #     if hunk['new_file'] != bh.orig_path:
            #         continue
            #     ls = final_start
            #     for i, blame_line in enumerate(hunk['content'].split('\n')):
            #         if blame_line[1:].strip() and line[1:].strip() and blame_line[1:] == line[1:]:
            #             print('blame: {}:{}'.format(ls, blame_line))
            #         ls += 1
            commits.append((inducing_commit.hex, bh.orig_path))

        # make unique
        return list(set(commits))

    def commit_information(self, revision_hash):
        obj = self._repo.get(revision_hash)

        return {
            'author_name':
            obj.author.name,
            'author_email':
            obj.author.email,
            'committer_name':
            obj.committer.name,
            'committer_email':
            obj.committer.email,
            'committer_date_utc':
            datetime.fromtimestamp(obj.commit_time, tz=timezone.utc),
            'committer_date':
            obj.commit_time,
            'committer_date_offset':
            obj.commit_time_offset,
            'message':
            obj.message,
            'file_actions':
            self._file_actions[revision_hash]
        }

    def file_actions(self, revision_hash):
        return self._file_actions[revision_hash]

    def all_files(self, revision_hash):
        # 1. checkout repo
        self._checkout_revision(revision_hash)

        # 2. list files
        return self._list_files()

    def first_occurence(self, filename):
        # file rename tracking is not possible currently in libgit, see:
        # https://github.com/libgit2/libgit2/issues/3041

        # find first occurence of file with git cli

        # git log --follow --diff-filter=A --find-renames=40% foo.js
        path = self._path.replace('.git', '')
        c = subprocess.run([
            'git', 'log', '--all', '--pretty=tformat:"%H %ci"', '--follow',
            '--diff-filter=A', '--find-renames=80%', '--', filename
        ],
                           cwd=path,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
        if c.returncode != 0:
            err = 'Error finding first occurrence of file: {}'.format(filename)
            self._log.error(err)
            self._log.error(c.stderr)
            raise Exception(err)

        full = c.stdout.decode('utf-8')
        try:
            first_line = full.split('\n')[-2]
        except IndexError:
            if not full:
                print('no git log for file {}'.format(filename))
            print(full)
            raise
        first_date = ' '.join(first_line.split(' ')[1:]).replace('"', '')
        dt = datetime.strptime(
            first_date, '%Y-%m-%d %H:%M:%S %z'
        )  # we can do this here because we control the input format, %z does not cover +01:00 just +100 (at least in 3.6)
        return dt

    def tags(self):
        regex = re.compile('^refs/tags')
        ret = []
        for tagref in filter(lambda r: regex.match(r),
                             self._repo.listall_references()):
            tag = self._repo.lookup_reference(tagref)
            target = self._repo.lookup_reference(tagref).peel()
            ret.append({
                'name': tag.name.replace('refs/tags/', ''),
                'revision_hash': target.id
            })
        return ret

    def _checkout_revision(self, revision):
        """Checkout via shell, we ignore stdout output."""
        path = self._path.replace('.git', '')
        c = subprocess.run(['git', 'checkout', '-q', '-f', revision],
                           cwd=path,
                           stdout=subprocess.PIPE)
        return c.returncode == 0

    def _list_files(self):
        """The slower list_files"""
        path = self._path.replace('.git', '')

        ret = []
        for root, dirs, files in os.walk(path):
            for file in files:
                filepath = os.path.join(root, file)
                relative_filepath = filepath.replace(path, '')
                ret.append(relative_filepath)
        return ret

    def _list_files2(self):
        """The faster list_files (relies on find command)"""
        path = self._path.replace('.git', '')
        lines = subprocess.check_output(['find', '.', '-iname', '*.java'],
                                        cwd=path)

        files = []
        for f in lines.decode('utf-8').split('\n'):
            if f.lower().endswith('.java'):
                files.append(f.replace('./', ''))

        return files

    def _get_hunks(self, commit):
        diffs = []
        hunks = []

        # for initial commit (or orphan commits) pygit2 needs some special attention
        initial = False
        if not commit.parents:
            initial = True
            diffs.append((None,
                          commit.tree.diff_to_tree(context_lines=0,
                                                   interhunk_lines=1)))

        # we may have multiple parents (merge commit)
        for parent in commit.parents:
            # we need all information from each parent because in a merge each parent may add different files
            tmp = self._repo.diff(parent,
                                  commit,
                                  context_lines=0,
                                  interhunk_lines=1)
            tmp.find_similar(self._dopts, self._SIMILARITY_THRESHOLD,
                             self._SIMILARITY_THRESHOLD)
            diffs.append((parent.hex, tmp))

        for parent, diff in diffs:
            checked_paths = set()
            for patch in diff:
                if patch.delta.new_file.path in checked_paths:
                    self._log.warn('already have {} in checked_paths'.format(
                        patch.delta.new_file.path))
                    continue
                mode = 'X'
                if patch.delta.status == 1:
                    mode = 'A'
                elif patch.delta.status == 2:
                    mode = 'D'
                elif patch.delta.status == 3:
                    mode = 'M'
                elif patch.delta.status == 4:
                    mode = 'R'
                elif patch.delta.status == 5:
                    mode = 'C'
                elif patch.delta.status == 6:
                    mode = 'I'
                elif patch.delta.status == 7:
                    mode = 'U'
                elif patch.delta.status == 8:
                    mode = 'T'

                # diff to tree gives D for inital commit otherwise
                if initial:
                    mode = 'A'

                # we may have hunks to add
                if patch.hunks and commit.hex not in self._hunks.keys():
                    self._hunks[commit.hex] = []

                # add hunks
                for hunk in patch.hunks:
                    # initial is special case
                    if initial:
                        content = ''.join(
                            ['+' + l.content for l in hunk.lines])
                        hunks.append({
                            'header': hunk.header,
                            'new_file': patch.delta.new_file.path,
                            'new_start': hunk.old_start,
                            'new_lines': hunk.old_lines,
                            'old_start': hunk.new_start,
                            'old_lines': hunk.new_lines,
                            'content': content
                        })
                    else:
                        content = ''.join(
                            [l.origin + l.content for l in hunk.lines])
                        hunks.append({
                            'header': hunk.header,
                            'new_file': patch.delta.new_file.path,
                            'new_start': hunk.new_start,
                            'new_lines': hunk.new_lines,
                            'old_start': hunk.old_start,
                            'old_lines': hunk.old_lines,
                            'content': content
                        })
        return hunks

    def _changed_files(self, commit):
        changed_files = []
        diffs = []

        # for initial commit (or orphan commits) pygit2 needs some special attention
        initial = False
        if not commit.parents:
            initial = True
            diffs.append((None,
                          commit.tree.diff_to_tree(context_lines=0,
                                                   interhunk_lines=1)))

        # we may have multiple parents (merge commit)
        for parent in commit.parents:
            # we need all information from each parent because in a merge each parent may add different files
            tmp = self._repo.diff(parent,
                                  commit,
                                  context_lines=0,
                                  interhunk_lines=1)
            tmp.find_similar(self._dopts, self._SIMILARITY_THRESHOLD,
                             self._SIMILARITY_THRESHOLD)
            diffs.append((parent.hex, tmp))

        for parent, diff in diffs:
            checked_paths = set()
            for patch in diff:
                if patch.delta.new_file.path in checked_paths:
                    self._log.warn('already have {} in checked_paths'.format(
                        patch.delta.new_file.path))
                    continue
                mode = 'X'
                if patch.delta.status == 1:
                    mode = 'A'
                elif patch.delta.status == 2:
                    mode = 'D'
                elif patch.delta.status == 3:
                    mode = 'M'
                elif patch.delta.status == 4:
                    mode = 'R'
                elif patch.delta.status == 5:
                    mode = 'C'
                elif patch.delta.status == 6:
                    mode = 'I'
                elif patch.delta.status == 7:
                    mode = 'U'
                elif patch.delta.status == 8:
                    mode = 'T'

                # diff to tree gives D for inital commit otherwise
                if initial:
                    mode = 'A'

                # we may have hunks to add
                if patch.hunks and commit.hex not in self._hunks.keys():
                    self._hunks[commit.hex] = []

                # add hunks
                for hunk in patch.hunks:
                    # initial is special case
                    if initial:
                        content = ''.join(
                            ['+' + l.content for l in hunk.lines])
                        self._hunks[commit.hex].append({
                            'header':
                            hunk.header,
                            'new_file':
                            patch.delta.new_file.path,
                            'new_start':
                            hunk.old_start,
                            'new_lines':
                            hunk.old_lines,
                            'old_start':
                            hunk.new_start,
                            'old_lines':
                            hunk.new_lines,
                            'content':
                            content
                        })
                    else:
                        content = ''.join(
                            [l.origin + l.content for l in hunk.lines])
                        self._hunks[commit.hex].append({
                            'header':
                            hunk.header,
                            'new_file':
                            patch.delta.new_file.path,
                            'new_start':
                            hunk.new_start,
                            'new_lines':
                            hunk.new_lines,
                            'old_start':
                            hunk.old_start,
                            'old_lines':
                            hunk.old_lines,
                            'content':
                            content
                        })

                # collect line stats
                if initial:
                    fa = {
                        'lines_added': patch.line_stats[2],
                        'lines_deleted': patch.line_stats[1],
                        'changeset_size': len(diff),
                        'parent': None
                    }
                else:
                    fa = {
                        'lines_added': patch.line_stats[1],
                        'lines_deleted': patch.line_stats[2],
                        'changeset_size': len(diff),
                        'parent': parent
                    }

                #if mode == 'R':
                #    print('R {} -> {}, sim: {}'.format(patch.delta.old_file.path, patch.delta.new_file.path, patch.delta.similarity))

                if mode in ['C', 'R']:
                    changed_file = [
                        mode, patch.delta.new_file.path,
                        patch.delta.old_file.path, fa
                    ]
                else:
                    changed_file = [mode, patch.delta.new_file.path, None, fa]

                checked_paths.add(patch.delta.new_file.path)
                changed_files.append(changed_file)
        return changed_files

    def collect(self):
        # list all branches
        for branch in list(self._repo.branches):
            self._collect_branch(branch)

        # list all tags
        for obj in self._repo:
            tag = self._repo[obj]
            if tag.type == GIT_OBJ_TAG:
                self._collect_branch(tag, is_tag=True)

        return self._graph

    def _collect_branch(self, branch, is_tag=False):
        if type(branch) == str:
            branch = self._repo.branches[branch]

        # add nodes to graph
        try:
            for c in self._repo.walk(branch.target):
                self._graph.add_node(c.hex)

                # branch stuff, used for traversing backwards for tags in svn->git conversions
                # if c.hex not in self._branches.keys():
                #     self._branches[c.hex] = []

                # what about tags which are also on branches?
                # if is_tag:
                #     self._tags[c.hex] = branch.name
                # else:
                #     self._branches[c.hex].append(branch.branch_name)

                # add msg
                # self._msgs[c.hex] = c.message

                # add days, we use this later for lookup
                # day = str(datetime.fromtimestamp(c.commit_time, tz=timezone.utc).date())
                # if day not in self._days.keys():
                #     self._days[day] = []
                # self._days[day].append(c.hex)

                # add for convenience for OntdekBaanBfs
                # self._cdays[c.hex] = day

                # add changed files per node
                # if c.hex not in self._file_actions.keys():
                #     self._file_actions[c.hex] = self._changed_files(c)

                # still too expensive
                # self._create_hunks(c)

            # add edges to graph
            for c in self._repo.walk(branch.target):
                for p in c.parents:
                    self._graph.add_edge(p.hex, c.hex)
        except ValueError as e:
            pass
Ejemplo n.º 16
0
class ChangeCtxDefault(ChangeCtx):
    """Class with the specific implementation details for the change context
    of the default revision state of the repository. It inherits the common
    implementation from the class :class:`ChangeCtxBase`.
    """

    def __init__(self, repo_path):
        self._repo_path = repo_path
        self._repo = Repository(self._repo_path)
        self._ctx = self._repo[self.revision_id]

    @locked_cached_property
    def files(self):
        def r(_files, repo, tree, prefix=None):
            for entry in tree:
                obj = repo[entry.oid]
                filename = prefix and (prefix + '/' + entry.name) or entry.name
                if obj.type == GIT_OBJ_TREE:
                    r(_files, repo, obj, filename)
                elif obj.type == GIT_OBJ_BLOB:
                    _files.append(filename)
                else:
                    raise RuntimeError('Invalid object: %s' % filename)
        f = []
        r(f, self._repo, self._ctx.tree)
        return sorted(f)

    @locked_cached_property
    def revision_id(self):
        """This property should be cached because the lookup_reference method
        reloads itself.
        """
        try:
            ref = self._repo.lookup_reference('refs/heads/master')
        except Exception:
            raise RuntimeError('Branch "master" not found!')
        return ref.target

    def needs_reload(self):
        try:
            ref = self._repo.lookup_reference('refs/heads/master')
        except Exception:
            return True
        return self.revision_id != ref.target

    def filectx_needs_reload(self, filectx):
        try:
            ref = self._repo.lookup_reference('refs/heads/master')
        except Exception:
            raise RuntimeError('Branch "master" not found!')
        return filectx._changectx.oid != ref.target

    def published(self, date, now):
        return date <= now

    def etag(self, filectx):
        return 'blohg-%i-%i-%s' % (filectx.mdate or filectx.date,
                                   len(filectx.data),
                                   adler32(filectx.path.encode('utf-8'))
                                   & 0xffffffff)

    def get_filectx(self, path):
        return FileCtx(self._repo, self._ctx, path)
Ejemplo n.º 17
0
def get_folders(projectCode, branch, requested_path):
    """
    :projectCode: идентификатор проекта
    :branch: необходимая ветка
    :folderPath: GET параметр путь к папке, получить через request.args.get('folderPath')

    **Response:**
    ```
    {
        "list": [
            {
                "name": "myfile.md",
                "full_path": "/folder/myfile.md",
                "parent": "/folder/"
            }
        ],
        "_meta": {
            "per-page": 12,
            "page": 12,
            "total-pages": 12
        }
    }
    ```
    """
    # Get path
    requested_path = "/" + requested_path
    #    print(requested_path, file=sys.stderr)

    # Set folder
    folder = config["REPO_FOLDER"] + projectCode
    if not os.path.isdir(folder):
        # TODO: throw exception
        return json.dumps({"error": 404, "description": "Project not found"})

    # Checkout branch
    repo = Repository(folder)
    branch = repo.lookup_branch('master')
    ref = repo.lookup_reference(branch.name)
    repo.checkout(ref)
    # TODO: exception if branch not exists

    # Get files it path
    list = []
    for root, dirs, files in os.walk(folder + requested_path):
        for filename in files:
            if root == folder + "/":
                list.append({
                    "name": filename,
                    "full_path": "/" + filename,
                    "parent": "/"
                })
            else:
                list.append({
                    "name": filename,
                    "full_path": root[len(folder):] + "/" + filename,
                    "parent": root[len(folder):] + "/"
                })

    response = {
        "list": list,
        "_meta": {
            "per-page": 99999,  # TODO: make pagination?
            "page": 1,
            "total-pages": 1
        }
    }
    return json.dumps(response)
Ejemplo n.º 18
0
 def _checkout_commit(self, repo: pygit2.Repository, commit):
     repo.create_reference(DataExtractor.WORKING_TAG_REFNAME, commit.id)
     repo.checkout(DataExtractor.WORKING_TAG_REFNAME)
     repo.lookup_reference(DataExtractor.WORKING_TAG_REFNAME).delete()
Ejemplo n.º 19
0
class PyGitEngine(GitContentDatabaseEngine):
    def __init__(self, config):
        super(PyGitEngine, self).__init__(config)
        self.repo = None

    def connect(self):
        """Create content directory"""
        if not isdir(self.content_path):
            init_repository(self.content_path, bare=True)
            self.repo = Repository(self.content_path)
            self.create_initial_commit()
        else:
            self.repo = Repository(self.content_path)

    @staticmethod
    def do_put(content_path, object_hashes, content, filename):
        """Perform put operation. This is used in the distributed wrapper"""
        content_hash = Repository(content_path).create_blob(content)
        result = object_hashes[filename] = str(content_hash)
        return result

    def put_attr(self, content, filename):
        """Return attributes for the do_put operation"""
        filename = self._inc_name(filename)
        return (self.content_path, self.object_hashes, content, filename)

    def put(self, content, filename="generic"):  # pylint: disable=method-hidden
        """Put content in the content database"""
        return self.do_put(*self.put_attr(content, filename))

    def get(self, content_hash):  # pylint: disable=method-hidden
        """Get content from the content database"""
        return_data = self.repo[content_hash].data
        return return_data

    def find_subhash(self, content_hash):
        """Find hash in git"""
        try:
            blob = self.repo.revparse_single(content_hash)
            return str(blob.id)
        except KeyError:
            return None

    def create_initial_commit(self):
        """Create the initial commit of the git repository"""
        empty_tree = self.repo.TreeBuilder().write()
        self.create_commit_object(self._initial_message, empty_tree)

    def create_commit_object(self, message, tree):
        """Create a commit object"""
        references = list(self.repo.references)

        master_ref = self.repo.lookup_reference(
            self._commit_ref) if len(references) > 0 else None

        parents = []
        if master_ref is not None:
            parents = [master_ref.peel().id]

        author = Signature(self._commit_name, self._commit_email)
        return self.repo.create_commit(self._commit_ref, author, author,
                                       message, tree, parents)

    def new_tree(self, parent):
        """Create new git tree"""
        return self.repo.TreeBuilder()

    def insert_blob(self, tree, basename, value):
        """Insert blob into tree"""
        tree.insert(basename, value, GIT_FILEMODE_BLOB)

    def insert_tree(self, tree, basename, value):
        """Insert tree into tree"""
        tree.insert(basename, value, GIT_FILEMODE_TREE)

    def write_tree(self, tree):
        """Write tree to git directory"""
        return tree.write()
Ejemplo n.º 20
0
class Project1Test(unittest.TestCase):
    def __init__(self, test_name, grader):
        super(Project1Test, self).__init__(test_name)
        self.grader = grader

    # setUpClass doesn't work!?
    def setUp(self):
        self.out_string = ""
        self.repo = REPO_TEMPLATE % self.grader.andrewid
        os.chdir(self.grader.tmp_dir)
        try:
            self.tearDown()
        except:
            pass
        self.git_clone(self.grader.root_repo)
        os.chdir(self.repo)
        self.repository = Repository('.git')
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        self.ran = False
        self.port = random.randint(1025, 9999)
        self.tls_port = random.randint(1025, 9999)
        print '\nUsing ports: %d,%d' % (self.port, self.tls_port)

    def pAssertEqual(self, arg1, arg2):
        try:
            self.assertEqual(arg1, arg2)
        except Exception as e:
            self.print_str(traceback.format_stack()[-2])
            raise e

    def pAssertTrue(self, test):
        try:
            self.assertTrue(test)
        except Exception as e:
            self.print_str(traceback.format_stack()[-2])
            raise e

    def print_str(self, prt_str):
        print(prt_str)
        self.out_string += ("\n" + prt_str)

    def edit_notes(self, new_note):
        notef = self.grader.notes
        try:
            check_both('cat %s' % (notef), False)
            new_note = '\n' + new_note
        except:
            pass
        check_both('echo "%s\nGood." >> %s' % (new_note, notef), False)
        check_both('%s %s' % (self.grader.editor, notef))

    def confirm(self):
        print '-----------------------------------------------'
        test = raw_input('OK [y/n]? ').lower() in ['y', '']
        self.pAssertTrue(test)

    def change_cgi(self, new_path):
        self.grader.cgi = new_path

    def liso_name(self):
        name = './lisod'
        # text = raw_input('liso name? ').strip()
        # if text: name = text
        self.liso_name = name
        return name

    def get_path(self):
        path = None
        # text = raw_input('path? ').strip()
        # if text: path = text
        return path

    def get_port(self):
        port = self.port
        # text = raw_input('port? ').strip()
        # if text: port = int(text)
        self.port = port
        print port
        return port

    def get_tls_port(self):
        tls_port = self.tls_port
        # text = raw_input('tls_port? ').strip()
        # if text: tls_port = int(text)
        self.tls_port = tls_port
        print tls_port
        return tls_port

    def find_path(self, name, tree, path='./', d=0):
        if d == 15: return None
        name = name.lower().strip()

        # bredth first...?
        for entry in tree:
            if entry.name.lower().strip() == name:
                return path

        # now check depth...?
        entries = [e for e in tree]
        for entry in reversed(entries):
            obj = self.repository[entry.oid]
            if isinstance(obj, Tree):
                obj = self.find_path(name, obj, os.path.join(path, entry.name),
                                     d + 1)
                if obj:
                    return obj
        return None

    def find_file(self, name, tree, d=0):
        if d == 15: return None
        name = name.lower().strip()

        # bredth first...?
        for entry in tree:
            if entry.name.lower().strip() == name:
                resolved = self.repository[entry.oid]
                if not isinstance(resolved, Blob):
                    continue
                return resolved

        # now check depth...?
        entries = [e for e in tree]
        for entry in reversed(entries):
            obj = self.repository[entry.oid]
            if isinstance(obj, Tree):
                obj = self.find_file(name, obj, d + 1)
                if obj:
                    return obj
        return None

    def run_lisod(self, tree):
        path = self.get_path()
        liso = self.liso_name()
        port = self.get_port()
        tls_port = self.get_tls_port()
        if not path: path = self.find_path('Makefile', tree)
        print 'switching to: %s' % path
        os.chdir(path)
        check_both('make clean', False, False)
        check_output('make')
        self.ran = True
        resource.setrlimit(resource.RLIMIT_STACK,
                           (resource.RLIM_INFINITY, resource.RLIM_INFINITY))
        cmd = '%s %d %d %slisod.log %slisod.lock %s %s %s %s&' % (
            liso, port, tls_port, self.grader.tmp_dir, self.grader.tmp_dir,
            self.grader.www[:-1], self.grader.cgi, self.grader.priv_key,
            self.grader.cert)
        #cmd = 'nohup ' + cmd
        #cmd = cmd + " > /dev/null"
        print cmd
        self.pAssertEqual(0, os.system(cmd))
        return liso

    def git_clone(self, repourl):
        with open('/dev/null', 'w') as f:
            self.pAssertEqual(
                0, check_call(['git', 'clone', repourl], stderr=f, stdout=f))

    def git_checkout(self, commit_hex):
        with open('/dev/null', 'w') as f:
            self.pAssertEqual(
                0,
                check_call(
                    ['git', 'checkout', '%s' % commit_hex], stdout=f,
                    stderr=f))

    def resolve_tag(self):
        try:
            tag = self.repository.lookup_reference('refs/tags/checkpoint-%d' %
                                                   self.grader.cp_num)
        except KeyError:
            try:
                tag = self.repository.lookup_reference(
                    'refs/tags/checkpoint_%d' % self.grader.cp_num)
            except KeyError:
                tag = self.repository.lookup_reference(
                    'refs/tags/checkpoint%d' % self.grader.cp_num)
        #tag = self.repository.lookup_reference('refs/tags/regrade')
        commit = self.repository[tag.target]
        while isinstance(commit, Tag):
            commit = self.repository[commit.target]
        return commit

    def check_headers(self, response_type, headers, length_content, ext):
        self.pAssertEqual(headers['Server'].lower(), 'liso/1.0')

        try:
            datetime.datetime.strptime(headers['Date'],
                                       '%a, %d %b %Y %H:%M:%S %Z')
        except KeyError:
            self.print_str('Bad Date header')
        except:
            self.print_str('Bad Date header: %s' % (headers['Date']))

        self.pAssertEqual(int(headers['Content-Length']), length_content)
        #self.pAssertEqual(headers['Connection'].lower(), 'close')

        if response_type == 'GET' or response_type == 'HEAD':
            header_set = set([
                'connection', 'content-length', 'date', 'last-modified',
                'server', 'content-type'
            ])
            self.pAssertEqual(set(), header_set - set(headers.keys()))
            if headers['Content-Type'].lower() != MIME[ext]:
                self.print_str('MIME got %s expected %s' %
                               (headers['Content-Type'].lower(), MIME[ext]))
            self.pAssertTrue(
                headers['Content-Type'].lower() == MIME[ext]
                or headers['Content-Type'].lower() == MIME['.html'])

            try:
                datetime.datetime.strptime(headers['Last-Modified'],
                                           '%a, %d %b %Y %H:%M:%S %Z')
            except:
                self.print_str('Bad Last-Modified header: %s' %
                               (headers['Last-Modified']))
        elif response_type == 'POST':
            header_set = set(
                ['connection', 'content-length', 'date', 'server'])
            self.pAssertEqual(set(), header_set - set(headers.keys()))
        else:
            self.fail('Unsupported Response Type...')

    # test existence of tag in repo
    def test_tag_checkpoint(self):
        self.print_str('\n\n----- Testing Tag -----')
        self.repository.lookup_reference('refs/tags/checkpoint-%d' %
                                         self.grader.cp_num)

    # test turn in timestamp
    def test_timestamp(self):
        self.print_str('\n\n----- Testing Timestamp -----')
        commit = self.resolve_tag()
        self.print_str('ref/tags/checkpoint-%d: %s' %
                       (self.grader.cp_num, commit.hex))
        self.print_str('Due: %s' % self.grader.due_date)
        utctime = datetime.datetime.utcfromtimestamp(commit.commit_time)
        utcoffset = datetime.timedelta(minutes=commit.commit_time_offset)
        timestamp = utctime + utcoffset
        self.print_str('Timestamp: %s' % timestamp)
        timediff = timestamp - self.grader.due_date
        if timediff.days >= 0 and\
           timediff.seconds > 0 or\
           timediff.microseconds > 0:
            raise ValueError

    # test readme.txt file up to snuff
    def test_readme_file(self):
        self.print_str('\n\n----- Testing readme.txt file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- readme.txt -----'
        readme = self.find_file('readme.txt', tree)
        print readme.data,
        self.confirm()
        self.edit_notes('README:')

    # test vulnerabilities.txt up to snuff
    def test_vulnerabilities_file(self):
        self.print_str('\n\n----- Testing vulnerabilities.txt file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- vulnerabilities.txt -----'
        vulnerable = self.find_file('vulnerabilities.txt', tree)
        print vulnerable.data,
        self.confirm()
        self.edit_notes('VULNERABILITIES:')

    # test tests.txt up to snuff
    def test_tests_file(self):
        self.print_str('\n\n----- Testing tests.txt file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- tests.txt -----'
        tests = self.find_file('tests.txt', tree)
        print tests.data,
        self.confirm()
        self.edit_notes('TESTS:')

    # test Makefile up to snuff
    def test_Makefile_file(self):
        self.print_str('\n\n----- Testing Makefile file -----')
        commit = self.resolve_tag()
        tree = commit.tree
        print '\n----- Makefile -----'
        Makefile = self.find_file('Makefile', tree)
        print Makefile.data,
        self.confirm()
        self.edit_notes('MAKEFILE:')

    # test if source up to snuff
    def test_inspect_source(self):
        self.print_str('\n\n----- Inspect Source cod *.[c|h] -----')
        self.print_str(self.grader.source_reminder)
        self.pAssertEqual(0, check_call(['bash']))
        self.confirm()
        self.edit_notes('SOURCE:')

    # tests if make properly creates lisod...
    def test_lisod_file(self):
        self.print_str('\n\n----- Testing make -----')
        commit = self.resolve_tag()
        path = self.get_path()
        if not path: path = self.find_path('Makefile', commit.tree)
        os.chdir(path)
        check_output('make')
        self.pAssertTrue(os.path.exists('./lisod'))

    # send all test files to their server
    # get output, give 3 second timeout
    # check sha's of output
    def test_replays(self):
        self.print_str('\n\n----- Testing Replays -----')
        commit = self.resolve_tag()
        self.run_lisod(commit.tree)
        time.sleep(3)
        replays_dir = os.path.join(self.grader.tmp_dir, 'replays')
        if not os.path.exists(replays_dir):
            os.makedirs(replays_dir)
        files = os.listdir(replays_dir)
        num_passed = 0
        num_files = 0
        for fname in files:
            basename, extension = os.path.splitext(fname)
            if extension == '.test':
                num_files += 1
                self.print_str('testing %s...' % fname)
                fname = os.path.join(self.grader.tmp_dir + 'replays', fname)
                outfile = os.path.join(self.grader.tmp_dir + 'replays',
                                       '%s_%s.out' % (basename, self.repo))
                command = 'ncat -i 1s localhost %d < %s > %s' % (
                    self.port, fname, outfile)

                check_both(command, False, False)
                with open(
                        os.path.join(self.grader.tmp_dir + 'replays',
                                     basename + '.out')) as f:
                    with open(outfile) as f2:
                        outhash = hashlib.sha256(f.read()).hexdigest()
                        out2hash = hashlib.sha256(f2.read()).hexdigest()
                        if outhash == out2hash:
                            self.print_str('ok')
                            num_passed += 1
                        else:
                            self.print_str('failed')
                check_both('rm %s' % outfile)
        self.print_str('passed %d of %d' % (num_passed, num_files))
        self.pAssertEqual(num_passed, num_files)

    def test_HEAD_headers(self):
        self.print_str('----- Testing Headers -----')
        tests = {
            'http://127.0.0.1:%d/index.html':
            ('f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd',
             802),
            'http://127.0.0.1:%d/images/liso_header.png':
            ('abf1a740b8951ae46212eb0b61a20c403c92b45ed447fe1143264c637c2e0786',
             17431),
            'http://127.0.0.1:%d/style.css':
            ('575150c0258a3016223dd99bd46e203a820eef4f6f5486f7789eb7076e46736a',
             301)
        }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root, ext = os.path.splitext(test)
            response = requests.head(test % self.port, timeout=10.0)
            self.check_headers(response.request.method, response.headers,
                               tests[test][1], ext)

    def test_HEAD(self):
        self.print_str('----- Testing HEAD -----')
        tests = {
            'http://127.0.0.1:%d/index.html':
            ('f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd',
             802),
            'http://127.0.0.1:%d/images/liso_header.png':
            ('abf1a740b8951ae46212eb0b61a20c403c92b45ed447fe1143264c637c2e0786',
             17431),
            'http://127.0.0.1:%d/style.css':
            ('575150c0258a3016223dd99bd46e203a820eef4f6f5486f7789eb7076e46736a',
             301)
        }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root, ext = os.path.splitext(test)
            response = requests.head(test % self.port, timeout=10.0)
            contenthash = hashlib.sha256(response.content).hexdigest()
            self.pAssertEqual(200, response.status_code)

    def test_GET(self):
        self.print_str('----- Testing GET -----')
        tests = {
            'http://127.0.0.1:%d/index.html':
            'f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd',
            'http://127.0.0.1:%d/images/liso_header.png':
            'abf1a740b8951ae46212eb0b61a20c403c92b45ed447fe1143264c637c2e0786',
            'http://127.0.0.1:%d/style.css':
            '575150c0258a3016223dd99bd46e203a820eef4f6f5486f7789eb7076e46736a'
        }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root, ext = os.path.splitext(test)
            response = requests.get(test % self.port, timeout=10.0)
            contenthash = hashlib.sha256(response.content).hexdigest()
            self.pAssertEqual(200, response.status_code)
            self.pAssertEqual(contenthash, tests[test])

    def test_POST(self):
        self.print_str('----- Testing POST -----')
        tests = {
            'http://127.0.0.1:%d/index.html':
            'f5cacdcb48b7d85ff48da4653f8bf8a7c94fb8fb43407a8e82322302ab13becd',
        }
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        for test in tests:
            root, ext = os.path.splitext(test)
            # for checkpoint 2, this should time out; we told them to swallow the data and ignore
            try:
                response = requests.post(test % self.port,
                                         data='dummy data',
                                         timeout=3.0)
            #except requests.exceptions.Timeout:
            except requests.exceptions.RequestException:
                print 'timeout'
                continue
            except socket.timeout:
                print 'socket.timeout'
                continue

            # if they do return something, make sure it's OK
            self.pAssertEqual(200, response.status_code)

    def test_bw(self):
        print '(----- Testing BW -----'
        check_output('echo "----- Testing BW ----" >> %s' %
                     self.grader.results)
        commit = self.resolve_tag()
        self.git_checkout(commit.hex)
        name = self.run_lisod(commit.tree)
        time.sleep(1)
        self.pAssertEqual(
            0,
            os.system(
                'curl -m 10 -o /dev/null http://127.0.0.1:%d/big.html 2>> %s' %
                (self.port, self.grader.results)))

    def tearDown(self):
        #check_both('rm ' + self.grader.tmp_dir + 'lisod.log', False, False)
        check_both('rm ' + self.grader.tmp_dir + 'lisod.lock', False, False)
        os.chdir(self.grader.tmp_dir)
        shutil.rmtree(self.repo)
        if sys.exc_info() == (None, None, None):  #test succeeded
            self.out_string += '\nok'
        else:
            self.out_string += '\nfailed'
        if self.out_string:
            check_both('echo "%s" >> %s' %
                       (self.out_string, self.grader.results))
        if self.ran:
            print 'trying "killall -9 %s"' % os.path.basename(self.liso_name)
            check_both('killall -9 %s' % os.path.basename(self.liso_name),
                       True, False)
Ejemplo n.º 21
0
class GitMixin(object):

    tag_or_remote_regex = re.compile('^refs/(tags|remotes)/(.*)')

    def __init__(self):
        where = GitOperations.get_repository_location(self.user, self.name)
        self.ondisk = Repository(where)

    def refresh(self):
        creds = GitOperations.get_credentials(self.git_user, self.user)
        for remote in self.ondisk.remotes:
            remote.credentials = creds
            remote.fetch()
        # update current reference
        master_ref = self.ondisk.lookup_reference('refs/heads/master')
        remote_ref = self.ondisk.lookup_reference('refs/remotes/origin/master')
        master_ref.set_target(remote_ref.target)

    def filter_references(self, regex):
        return [ref for ref in self.ondisk.listall_references()
                if regex.match(ref)]

    def get_commit_time(self, name):
        ref = self.ondisk.revparse_single(name)
        if isinstance(ref, Tag):
            return ref.get_object().commit_time
        if isinstance(ref, Commit):
            return ref.commit_time
        raise GitException('invalid reference: commit time could not be found.') # pragma: no cover

    def get_latest_refs(self, count=None):
        info = self.filter_references(GitMixin.tag_or_remote_regex)
        refs = list(zip(info, map(self.get_commit_time, info)))
        refs.sort(key=itemgetter(1), reverse=True)
        def ref_info(info):
            (ref, commit_time) = info
            what, name = GitMixin.tag_or_remote_regex.findall(ref)[0]
            return (what, name, commit_time)
        refs = map(ref_info, refs)
        if not count:
            return refs
        return islice(refs, count)

    def filter_commits(self, flags=0):
        all_commits = self.ondisk.walk(self.ondisk.head.target, flags)
        emails = [ue.email for ue in self.user.emails.all()]
        return filter(lambda commit: commit.author.email in emails, all_commits)

    def get_commits(self, count=None):
        all_commits = self.filter_commits(GIT_SORT_TOPOLOGICAL)
        if not count:
            return all_commits
        return islice(all_commits, count)

    def get_commit_count(self):
        return len(list(self.filter_commits()))

    def get_shorthand_of_branch(self, branch):
        commit = self.ondisk.lookup_branch(branch)
        if commit:
            return commit.shorthand
        return '(none)'

    def get_sha1_of_branch(self, branch):
        commit = self.ondisk.lookup_branch(branch)
        if commit:
            return str(commit.get_object().id)[:6]
        return '(none)'

    def get_numstat(self, commit):
        diff = None
        try:
            previous_commit = self.ondisk.revparse_single(str(commit.id) + '^')
            diff = self.ondisk.diff(previous_commit, commit)
        except KeyError:
            # likely we hit the very first commit.
            diff = commit.tree.diff_to_tree(swap=True)
        additions, deletions = 0, 0
        for patch in diff:
            additions += patch.additions
            deletions += patch.deletions
        return (len(diff), additions, deletions)

    def get_first_updated(self):
        all_commits = self.ondisk.walk(self.ondisk.head.target,
                                       GIT_SORT_TIME | GIT_SORT_REVERSE)
        first_commit = next(all_commits)
        return first_commit.commit_time

    def get_last_updated(self):
        all_commits = self.ondisk.walk(self.ondisk.head.target,
                                       GIT_SORT_TIME)
        last_commit = next(all_commits)
        return last_commit.commit_time

    def get_file_count(self):
        diff = self.ondisk.head.get_object().tree.diff_to_tree()
        return len([patch.old_file_path for patch in diff])

    def get_line_count(self):
        diff = self.ondisk.head.get_object().tree.diff_to_tree()
        return sum([patch.deletions for patch in diff])

    def get_author_count(self):
        commits = self.filter_commits()
        return len(set([commit.author.email for commit in commits]))

    def commits_between(self, start, end):
        all_commits = self.filter_commits(GIT_SORT_TIME | GIT_SORT_REVERSE)
        starting = dropwhile(lambda obj: obj.commit_time < start, all_commits)
        return takewhile(lambda obj: obj.commit_time <= end, starting)

    @staticmethod
    def by_day(obj):
        # we want to group our commit times by the day. so convert
        # timestamp -> date -> timestamp
        new_date = date.fromtimestamp(obj.commit_time)
        new_date += timedelta(days=1)
        return timegm(new_date.timetuple())

    @staticmethod
    def group_by(series):
        result = groupby(series, GitMixin.by_day)
        return [{'date': commit_date,
                 'value': len(list(commits))}
                for commit_date, commits in result]

    def histogram(self, start, end):
        series = self.commits_between(start, end)
        return GitMixin.group_by(series)
Ejemplo n.º 22
0
class GitRepo:
    """A class that manages a git repository.

    This class enables versiong via git for a repository.
    You can stage and commit files and checkout different commits of the repository.
    """

    path = ''
    pathspec = []
    repo = None
    callback = None
    author_name = 'QuitStore'
    author_email = '*****@*****.**'
    gcProcess = None

    def __init__(self, path, origin=None, gc=False):
        """Initialize a new repository from an existing directory.

        Args:
            path: A string containing the path to the repository.
            origin: The remote URL where to clone and fetch from and push to
        """
        logger = logging.getLogger('quit.core.GitRepo')
        logger.debug('GitRepo, init, Create an instance of GitStore')
        self.path = path
        self.gc = gc

        if not exists(path):
            try:
                makedirs(path)
            except OSError as e:
                raise Exception('Can\'t create path in filesystem:', path, e)

        try:
            self.repo = Repository(path)
        except KeyError:
            pass
        except AttributeError:
            pass

        if origin:
            self.callback = QuitRemoteCallbacks()

        if self.repo:
            if self.repo.is_bare:
                raise QuitGitRepoError('Bare repositories not supported, yet')

            if origin:
                # set remote
                self.addRemote('origin', origin)
        else:
            if origin:
                # clone
                self.repo = self.cloneRepository(origin, path, self.callback)
            else:
                self.repo = init_repository(path=path, bare=False)

    def cloneRepository(self, origin, path, callback):
        try:
            repo = clone_repository(url=origin,
                                    path=path,
                                    bare=False,
                                    callbacks=callback)
            return repo
        except Exception as e:
            raise QuitGitRepoError(
                "Could not clone from: {} origin. {}".format(origin, e))

    def addall(self):
        """Add all (newly created|changed) files to index."""
        self.repo.index.read()
        self.repo.index.add_all(self.pathspec)
        self.repo.index.write()

    def addfile(self, filename):
        """Add a file to the index.

        Args:
            filename: A string containing the path to the file.
        """
        index = self.repo.index
        index.read()

        try:
            index.add(filename)
            index.write()
        except Exception as e:
            logger.info(
                "GitRepo, addfile, Could not add file  {}.".format(filename))
            logger.debug(e)

    def addRemote(self, name, url):
        """Add a remote.

        Args:
            name: A string containing the name of the remote.
            url: A string containing the url to the remote.
        """
        try:
            self.repo.remotes.create(name, url)
            logger.info("Successfully added remote: {} - {}".format(name, url))
        except Exception as e:
            logger.info("Could not add remote: {} - {}".format(name, url))
            logger.debug(e)

        try:
            self.repo.remotes.set_push_url(name, url)
            self.repo.remotes.set_url(name, url)
        except Exception as e:
            logger.info("Could not set push/fetch urls: {} - {}".format(
                name, url))
            logger.debug(e)

    def checkout(self, commitid):
        """Checkout a commit by a commit id.

        Args:
            commitid: A string cotaining a commitid.
        """
        try:
            commit = self.repo.revparse_single(commitid)
            self.repo.set_head(commit.oid)
            self.repo.reset(commit.oid, GIT_RESET_HARD)
            logger.info("Checked out commit: {}".format(commitid))
        except Exception as e:
            logger.info("Could not check out commit: {}".format(commitid))
            logger.debug(e)

    def commit(self, message=None):
        """Commit staged files.

        Args:
            message: A string for the commit message.
        Raises:
            Exception: If no files in staging area.
        """
        if self.isstagingareaclean():
            # nothing to commit
            return

        index = self.repo.index
        index.read()
        tree = index.write_tree()

        try:
            author = Signature(self.author_name, self.author_email)
            comitter = Signature(self.author_name, self.author_email)

            if len(self.repo.listall_reference_objects()) == 0:
                # Initial Commit
                if message is None:
                    message = 'Initial Commit from QuitStore'
                self.repo.create_commit('HEAD', author, comitter, message,
                                        tree, [])
            else:
                if message is None:
                    message = 'New Commit from QuitStore'
                self.repo.create_commit('HEAD', author, comitter, message,
                                        tree,
                                        [self.repo.head.get_object().hex])
            logger.info('Updates commited')
        except Exception as e:
            logger.info('Nothing to commit')
            logger.debug(e)

        if self.gc:
            self.garbagecollection()

    def commitexists(self, commitid):
        """Check if a commit id is part of the repository history.

        Args:
            commitid: String of a Git commit id.
        Returns:
            True, if commitid is part of commit log
            False, else.
        """
        if commitid in self.getids():
            return True
        else:
            return False

    def garbagecollection(self):
        """Start garbage collection.

        Args:
            commitid: A string cotaining a commitid.
        """
        try:
            # Check if the garbage collection process is still running
            if self.gcProcess is None or self.gcProcess.poll() is not None:
                # Start garbage collection with "--auto" option,
                # which imidietly terminates, if it is not necessary
                self.gcProcess = Popen(["git", "gc", "--auto", "--quiet"],
                                       cwd=self.path)
                logger.debug('Spawn garbage collection')
        except Exception as e:
            logger.debug('Git garbage collection failed to spawn')
            logger.debug(e)

    def getpath(self):
        """Return the path of the git repository.

        Returns:
            A string containing the path to the directory of git repo
        """
        return self.path

    def getcommits(self):
        """Return meta data about exitsting commits.

        Returns:
            A list containing dictionaries with commit meta data
        """
        commits = []
        if len(self.repo.listall_reference_objects()) > 0:
            for commit in self.repo.walk(self.repo.head.target,
                                         GIT_SORT_REVERSE):
                commits.append({
                    'id':
                    str(commit.oid),
                    'message':
                    str(commit.message),
                    'commit_date':
                    datetime.fromtimestamp(
                        commit.commit_time).strftime('%Y-%m-%dT%H:%M:%SZ'),
                    'author_name':
                    commit.author.name,
                    'author_email':
                    commit.author.email,
                    'parents': [c.hex for c in commit.parents],
                })
        return commits

    def getids(self):
        """Return meta data about exitsting commits.

        Returns:
            A list containing dictionaries with commit meta data
        """
        ids = []
        if len(self.repo.listall_reference_objects()) > 0:
            for commit in self.repo.walk(self.repo.head.target,
                                         GIT_SORT_REVERSE):
                ids.append(str(commit.oid))
        return ids

    def isgarbagecollectionon(self):
        """Return if gc is activated or not.

        Returns:
            True, if activated
            False, if not
        """
        return self.gc

    def isstagingareaclean(self):
        """Check if staging area is clean.

        Returns:
            True, if staginarea is clean
            False, else.
        """
        status = self.repo.status()

        for filepath, flags in status.items():
            if flags != GIT_STATUS_CURRENT:
                return False

        return True

    def pull(self, remote='origin', branch='master'):
        """Pull if possible.

        Return:
            True: If successful.
            False: If merge not possible or no updates from remote.
        """
        try:
            self.repo.remotes[remote].fetch()
        except Exception as e:
            logger.info("Can not pull:  Remote {} not found.".format(remote))
            logger.debug(e)

        ref = 'refs/remotes/' + remote + '/' + branch
        remoteid = self.repo.lookup_reference(ref).target
        analysis, _ = self.repo.merge_analysis(remoteid)

        if analysis & GIT_MERGE_ANALYSIS_UP_TO_DATE:
            # Already up-to-date
            pass
        elif analysis & GIT_MERGE_ANALYSIS_FASTFORWARD:
            # fastforward
            self.repo.checkout_tree(self.repo.get(remoteid))
            master_ref = self.repo.lookup_reference('refs/heads/master')
            master_ref.set_target(remoteid)
            self.repo.head.set_target(remoteid)
        elif analysis & GIT_MERGE_ANALYSIS_NORMAL:
            self.repo.merge(remoteid)
            tree = self.repo.index.write_tree()
            msg = 'Merge from ' + remote + ' ' + branch
            author = Signature(self.author_name, self.author_email)
            comitter = Signature(self.author_name, self.author_email)
            self.repo.create_commit('HEAD', author, comitter, msg, tree,
                                    [self.repo.head.target, remoteid])
            self.repo.state_cleanup()
        else:
            logger.debug('Can not pull. Unknown merge analysis result')

    def push(self, remote='origin', branch='master'):
        """Push if possible.

        Return:
            True: If successful.
            False: If diverged or nothing to push.
        """
        ref = ['refs/heads/' + branch]

        try:
            remo = self.repo.remotes[remote]
        except Exception as e:
            logger.info(
                "Can not push. Remote: {} does not exist.".format(remote))
            logger.debug(e)
            return

        try:
            remo.push(ref, callbacks=self.callback)
        except Exception as e:
            logger.info("Can not push to {} with ref {}".format(
                remote, str(ref)))
            logger.debug(e)

    def getRemotes(self):
        remotes = {}

        try:
            for remote in self.repo.remotes:
                remotes[remote.name] = [remote.url, remote.push_url]
        except Exception as e:
            logger.info('No remotes found.')
            logger.debug(e)
            return {}

        return remotes
Ejemplo n.º 23
0
class GitBareBackend(object):

    nb_transactions = 0

    def __init__(self, path):
        self.path = abspath(path) + '/'
        # Open database
        self.path_data = '%s/database/' % self.path
        if not lfs.is_folder(self.path_data):
            error = '"%s" should be a folder, but it is not' % path
            raise ValueError, error
        # Open repository
        self.repo = Repository(self.path_data)
        # Read index
        try:
            tree = self.repo.head.peel(GIT_OBJ_TREE)
            self.repo.index.read_tree(tree.id)
        except:
            pass
        # Check git commiter
        try:
            _, _ = self.username, self.useremail
        except:
            print '========================================='
            print 'ERROR: Please configure GIT commiter via'
            print ' $ git config --global user.name'
            print ' $ git config --global user.email'
            print '========================================='
            raise

    @classmethod
    def init_backend(cls, path, init=False, soft=False):
        init_repository('{0}/database'.format(path), bare=True)

    #######################################################################
    # Internal utility functions
    #######################################################################
    def _call(self, command):
        """Interface to cal git.git for functions not yet implemented using
        libgit2.
        """
        popen = Popen(command, stdout=PIPE, stderr=PIPE, cwd=self.path_data)
        stdoutdata, stderrdata = popen.communicate()
        if popen.returncode != 0:
            raise EnvironmentError, (popen.returncode, stderrdata)
        return stdoutdata

    @lazy
    def username(self):
        cmd = ['git', 'config', '--get', 'user.name']
        try:
            username = self._call(cmd).rstrip()
        except EnvironmentError:
            raise ValueError(
                "Please configure 'git config --global user.name'")
        return username

    @lazy
    def useremail(self):
        cmd = ['git', 'config', '--get', 'user.email']
        try:
            useremail = self._call(cmd).rstrip()
        except EnvironmentError:
            raise ValueError(
                "Please configure 'git config --global user.email'")
        return useremail

    def _resolve_reference(self, reference):
        """This method returns the SHA the given reference points to. For now
        only HEAD is supported.

        FIXME This is quick & dirty. TODO Implement references in pygit2 and
        use them here.
        """
        # Case 1: SHA
        if len(reference) == 40:
            return reference

        # Case 2: reference
        reference = self.repo.lookup_reference(reference)
        try:
            reference = reference.resolve()
        except KeyError:
            return None

        return reference.target

    def normalize_key(self, path, __root=None):
        # Performance is critical so assume the path is already relative to
        # the repository.
        key = __root.resolve(path)
        if key and key[0] == '.git':
            err = "bad '{0}' path, access to the '.git' folder is denied"
            raise ValueError(err.format(path))
        return '/'.join(key)

    def handler_exists(self, key):
        tree = self.repo.head.peel(GIT_OBJ_TREE)
        try:
            tree[key]
        except:
            return False
        return True

    def get_handler_names(self, key):
        try:
            tree = self.repo.head.peel(GIT_OBJ_TREE)
            if key:
                tree_entry = tree[key]
                if tree_entry.type == 'blob':
                    raise ValueError
                tree = self.repo[tree_entry.id]
        except:
            yield None
        else:
            for item in tree:
                yield item.name

    def get_handler_data(self, key):
        tree = self.repo.head.peel(GIT_OBJ_TREE)
        tree_entry = tree[key]
        blob = self.repo[tree_entry.id]
        return blob.data

    def get_handler_mimetype(self, key):
        data = self.get_handler_data(key)
        return magic_from_buffer(data)

    def handler_is_file(self, key):
        return not self.handler_is_folder(key)

    def handler_is_folder(self, key):
        repository = self.repo
        if key == '':
            return True
        else:
            tree = repository.head.peel(GIT_OBJ_TREE)
            tree_entry = tree[key]
        return tree_entry.type == 'tree'

    def get_handler_mtime(self, key):
        # FIXME
        return datetime.utcnow().replace(tzinfo=fixed_offset(0))

    def traverse_resources(self):
        tree = self.repo.head.peel(GIT_OBJ_TREE)
        yield self.get_resource('/')
        for name in self.get_names(tree):
            if name[-9:] == '.metadata' and name != '.metadata':
                yield self.get_resource('/' + name[:-9])

    def get_names(self, tree, path=''):
        for entry in tree:
            base_path = '{0}/{1}'.format(path, entry.name)
            yield base_path
            if entry.filemode == GIT_FILEMODE_TREE:
                sub_tree = self.repo.get(entry.hex)
                for x in self.get_names(sub_tree, base_path):
                    yield x

    def do_transaction(self, commit_message, data, added, changed, removed,
                       handlers):
        self.nb_transactions += 1
        # Get informations
        git_author, git_date, git_msg, docs_to_index, docs_to_unindex = data
        git_msg = commit_message or git_msg or 'no comment'
        # List of Changed
        added_and_changed = list(added) + list(changed)
        # Build the tree from index
        index = self.repo.index
        for key in added_and_changed:
            handler = handlers.get(key)
            blob_id = self.repo.create_blob(handler.to_str())
            entry = IndexEntry(key, blob_id, GIT_FILEMODE_BLOB_EXECUTABLE)
            index.add(entry)
        for key in removed:
            index.remove(key)
        git_tree = index.write_tree()
        # Commit
        self.git_commit(git_msg, git_author, git_date, tree=git_tree)

    def git_commit(self, message, author=None, date=None, tree=None):
        """Equivalent to 'git commit', we must give the message and we can
        also give the author and date.
        """
        # Tree
        if tree is None:
            #tree = self.index.write_tree()
            raise ValueError('Please give me a tree')

        # Parent
        parent = self._resolve_reference('HEAD')
        parents = [parent] if parent else []

        # Committer
        when_time = time.time()
        when_offset = -(time.altzone if time.daylight else time.timezone)
        when_offset = when_offset / 60

        name = self.username
        email = self.useremail
        committer = Signature(name, email, when_time, when_offset)

        # Author
        if author is None:
            author = (name, email)

        if date:
            if date.tzinfo:
                from pytz import utc
                when_time = date.astimezone(utc)  # To UTC
                when_time = when_time.timetuple()  # As struct_time
                when_time = timegm(when_time)  # To unix time
                when_offset = date.utcoffset().seconds / 60
            else:
                err = "Worktree.git_commit doesn't support naive datatime yet"
                raise NotImplementedError, err

        author = Signature(author[0], author[1], when_time, when_offset)

        # Create the commit
        return self.repo.create_commit('HEAD', author, committer, message,
                                       tree, parents)

    def abort_transaction(self):
        # TODO: Remove created blobs
        pass
Ejemplo n.º 24
0
class DictRepository(object):
    """The :class:`DictRepository <DictRepository>` object.

    :param repo_or_path:
        The path to a repository, or an existing pygit2.Repository object.
        If it is a path that does not exist, a new bare git repository will
        be initialized there.  If it is a path that does exist, then the
        directory will be used as a bare git repository.
    :type repo_or_path: string or pygit2.Repository
    """

    def __init__(self, repo_or_path=None):

        self._default_author = get_default_author()
        if isinstance(repo_or_path, Repository):
            self._repo = repo_or_path
        elif os.path.isdir(repo_or_path):
            self._repo = Repository(repo_or_path)
        else:
            self._repo = init_repository(repo_or_path, True)  # bare repo

    def _key_to_ref(self, key):
        return "refs/%s/HEAD" % key

    def get_commit_oid_for_key(self, key):
        return self._repo[self._repo.lookup_reference(self._key_to_ref(key)).oid].oid

    def get_raw_dict_for_commit_oid(self, commit_oid):
        return json.loads(self._repo[self._repo[commit_oid].tree[DATA].oid].data)

    def get_parent_oids_for_commit_oid(self, commit_oid):
        return [parent.oid for parent in self._repo[commit_oid].parents]

    def raw_commit(self, key, raw_dict, author, committer, message, parents):
        """Commit a dict to this :class:`DictRepository <DictRepository>`.
        It is recommended that you use the :class:`GitDict <GitDict>` commit
        method instead.

        :param raw_dict: the data to commit.
        :type raw_dict: dict
        :param author:
            The author of the commit.  If None, will be replaced with default.
        :type author: pygit2.Signature
        :param committer:
            The committer of this commit. If None, will be replaced with author.
        :type committer: pygit2.Signature
        :param message: The commit message.
        :type message: string
        :param parents:
            A list of 20-byte object IDs of parent commits.  An empty list
            means this is the first commit.

        :return: The oid of the new commit.
        :rtype: 20 bytes
        """
        if not isinstance(raw_dict, dict):
            raise ValueError("%s is not a dict" % raw_dict)

        author = author or self._default_author.signature()
        committer = committer or author

        blob_id = self._repo.write(GIT_OBJ_BLOB, json.dumps(raw_dict))

        # TreeBuilder doesn't support inserting into trees, so we roll our own
        tree_id = self._repo.write(GIT_OBJ_TREE, "100644 %s\x00%s" % (DATA, blob_id))

        return self._repo.create_commit(self._key_to_ref(key), author, committer, message, tree_id, parents)

    def create(self, key, dict={}, autocommit=False, message="first commit", author=None, committer=None):
        """Create a new :class:`GitDict <GitDict>`

        :param key: The key of the new :class:`GitDict <GitDict>`
        :type key: :class:`GitDict <GitDict>`
        :param dict: (optional) The value of the dict.  Defaults to empty.
        :type dict: dict
        :param autocommit:
            (optional) Whether the :class:`GitDict <GitDict>` should
            automatically commit. Defaults to false.
        :type autocommit: boolean
        :param message:
            (optional) Message for first commit.  Defaults to "first commit".
        :type message: string
        :param author:
            (optional) The signature for the author of the first commit.
            Defaults to global author.
        :type author: pygit2.Signature
        :param committer:
            (optional) The signature for the committer of the first commit.
            Defaults to author.
        :type author: pygit2.Signature

        :returns: the GitDict
        :rtype: :class:`GitDict <GitDict>`
        """
        self.raw_commit(key, dict, author, committer, message, [])
        return self.get(key, autocommit=autocommit)

    def has(self, key):
        """Determine whether there is an entry for key in this repository.

        :param key: The key to check
        :type key: string

        :returns: whether there is an entry
        :rtype: boolean
        """
        try:
            self._repo.lookup_reference(self._key_to_ref(key))
            return True
        except KeyError:
            return False

    def get(self, key, autocommit=False):
        """Obtain the :class:`GitDict <GitDict>` for a key.

        :param key: The key to look up.
        :type key: string
        :param autocommit:
            (optional) Whether the :class:`GitDict <GitDict>` should
            automatically commit. Defaults to false.
        :type autocommit: boolean

        :returns: the GitDict
        :rtype: :class:`GitDict <GitDict>`
        :raises: KeyError if there is no entry for key
        """
        return GitDict(self, key, autocommit=autocommit)

    def fast_forward(self, from_dict, to_dict):
        """Fast forward a :class:`GitDict <GitDict>`.

        :param from_dict: the :class:`GitDict <GitDict>` to fast forward.
        :type from_dict: :class:`GitDict <GitDict>`
        :param to_dict: the :class:`GitDict <GitDict>`to fast forward to.
        :type to_dict: :class:`GitDict <GitDict>`
        """
        from_ref = self._key_to_ref(from_dict.key)
        self._repo.lookup_reference(from_ref).delete()
        self._repo.create_reference(from_ref, self.get_commit_oid_for_key(to_dict.key))

    def clone(self, original, key):
        """Clone a :class:`GitDict <GitDict>`.

        :param original: the :class:`GitDict <GitDict>` to clone
        :type original: :class:`GitDict <GitDict>`
        :param key: where to clone to
        :type key: string
        :raises: ValueError if to_key already exists.
        """
        try:
            self._repo.create_reference(self._key_to_ref(key), self.get_commit_oid_for_key(original.key))
            return self.get(key, autocommit=original.autocommit)
        except GitError:
            raise ValueError("Cannot clone to %s, there is already a dict there." % key)
Ejemplo n.º 25
0
class GitDict():
    '''
    A python dict, stored in git so it can be larger than memory and yet accessed for
    reading and writing efficiently.  Keys and values are git objects.  The collection of
    key-value pairs is stored in a one level hierarchy of pages (git objects) that are
    indexed by a page table (also a git object).
    '''
    def __init__(self,
                 dir_,
                 name,
                 log=None,
                 do_create=False,
                 refs_ns='tags',
                 h_order=3):
        self.dir_ = dir_
        self.name = name
        self.name_size = name + '.size'
        self.name_items = name + '.items'
        self.log = log or print
        self.refs_ns = refs_ns
        self.h_order = h_order
        self.h_key_len = ((7 * h_order // 8) + 1) * 2
        try:
            self.repo = Repository(dir_)
        except GitError as e:
            if do_create:
                self.repo = init_repository(dir_, bare=True)
            else:
                raise e
        self.none = self.repo.write(GIT_OBJ_BLOB, '')
        self._init()

    def __repr__(self):
        return f'GitDict("{self.dir_}", "{self.name}")'

    def _lookup_reference(self, name):
        return self.repo.lookup_reference(f'refs/{self.refs_ns}/{name}')

    def _set_reference(self, name, target):
        try:
            self._lookup_reference(name).set_target(target)
        except KeyError:
            self.repo.references.create(f'refs/{self.refs_ns}/{name}', target)

    def _init(self):
        self._set_reference(self.name_size, self.repo.write(GIT_OBJ_BLOB, '0'))
        self.items_table = PageTable()

    @property
    def items_table(self):
        return PageTable(self.repo[self._lookup_reference(
            self.name_items).target].data)

    @items_table.setter
    def items_table(self, table):
        self._set_reference(self.name_items,
                            self.repo.write(GIT_OBJ_BLOB, table.data))

    def __len__(self):
        return int(self.repo[self._lookup_reference(self.name_size).target])

    def _inc_size(self):
        new_size = self.repo.write(GIT_OBJ_BLOB, str(len(self) + 1))
        self._set_reference(self.name_size, new_size)

    def __contains__(self, key):
        return True if self.get(key) is not None else False

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def _key_oid_and_h_key(self, oid):
        return oid.raw, int(oid.hex[:self.h_key_len], 16)

    @staticmethod
    def _entry_no(h_key, level):
        return (h_key >> (7 * level)) & 127

    def _get_page(self, h_key, table=None):
        table = table or self.items_table
        entry_no = self._entry_no(h_key, 0)
        try:
            return ItemPage(self.repo[Oid(table[entry_no])].data)
        except TypeError:
            return ItemPage()

    def __getitem__(self, key):
        key_oid, h_key = self._key_oid_and_h_key(pyghash(key))
        page = self._get_page(h_key)
        value_oid = page[key_oid]
        return self.repo[Oid(value_oid)].data

    def __setitem__(self, key, value):
        key_oid, h_key = self._key_oid_and_h_key(
            self.repo.write(GIT_OBJ_BLOB, key))
        value_oid = self.repo.write(GIT_OBJ_BLOB, value).raw

        table = self.items_table
        page = self._get_page(h_key, table)
        if key_oid in page:
            return

        page[key_oid] = value_oid
        page_oid = self.repo.write(GIT_OBJ_BLOB, page.data).raw
        entry_no = self._entry_no(h_key, 0)
        table[entry_no] = page_oid
        self.items_table = table
        self._inc_size()

    def report(self):
        self.log(f'{repr(self)}: contains {len(self)} elements.')

    def keys(self):
        table = self.items_table
        keys = []
        for k in range(table.TABLE_SIZE):
            if table[k] != table.EMPTY_PAGE_ID:
                page = ItemPage(self.repo[Oid(table[k])].data)
                for key in page.keys():
                    keys.append(self.repo[Oid(key)].data)
        return keys
Ejemplo n.º 26
0
    parser.add_argument("-f",
                        "--finish",
                        help="full commit id to end with",
                        type=str)

    args = parser.parse_args()
    if args.path:
        # verify path existence
        if path.exists(args.path) and args.path.endswith('.git'):
            proj_path = args.path[:-4]
            repo = Repository(args.path)
            if args.branch and args.branch in list(repo.branches.local):
                branch = repo.lookup_branch(
                    list(repo.branches.local)[list(repo.branches.local).index(
                        args.branch)])
                ref = repo.lookup_reference(branch.name)
                repo.checkout(ref)
                print('Current Branch:')
                print(repo.head.shorthand)
                if args.list:
                    pprint.pprint(get_commit_id_list(args.finish, args.start))
                if args.start and args.finish:
                    pprint.pprint(
                        git_perform_analysis(args.start, args.finish,
                                             proj_path))
            else:
                # pyprint avaliable branches
                print('Specify one of local avaliable local branches:')
                print(*list(repo.branches.local), sep="\n")
    else:
        print('path ether not exis or it\'s not a repo')
Ejemplo n.º 27
0
conn.commit()

# Loop through the projects in the config file
for repo_name in config.sections():

    # Attempt to open an existing local repo
    try:
        local_dir = os.path.join(base_path, 'repos', repo_name + '.git')
        repo = Repository(local_dir)

        # Fetch the latest commits (equivalent to "git fetch origin")
        progress = repo.remotes["origin"].fetch()

        # Update HEAD with the new commits (equivalent to "git update-ref HEAD FETCH_HEAD")
        head = repo.head
        fetch_head = repo.lookup_reference('FETCH_HEAD')
        new_head = head.set_target(fetch_head.target)

        # Notice new branches added to the origin
        os.chdir(local_dir)
        update_result = subprocess.call(["git", "remote", "update", "origin"])

        # Prune local branches no longer present on the origin
        prune_result = subprocess.call(["git", "remote", "prune", "origin"])

        # Run git gc, to stop potential unlimited repo growth from accumulating dead objects over time
        gc_result = subprocess.call(["git", "gc"])

    except KeyError:

        # Opening a local repo failed, so we assume it's not been cloned yet.  Do the cloning now
Ejemplo n.º 28
0
class GitMiner(BaseMiner):
    Id_Name_Login = namedtuple("Id_Name_Login", ["id", "name", "login"])
    Code_Change = namedtuple("Code_Change", ["commit_id", "filename"])

    def __init__(self, args):
        super().__init__(args)

        self._initialise_db()

        if args.dbms == "sqlite":
            self._conn.execute("PRAGMA foreign_keys=ON")

        self.email_map = {}
        self.commit_id = {}
        self.id_commit = {}
        self.code_change_map = {}

        self.__init_user_emails()

        self._dump_repository()

        self.aio = args.aio

        if self.aio:
            self._create_loop()

        self.repo = Repository(args.path)
        self._fetch_references()
        self._dump_tags()
        self._fetch_commit_ids()

    def _create_loop(self):
        self.loop = asyncio.new_event_loop()

    def load_from_file(self, file):
        pass

    def dump_to_file(self, path):
        pass

    def __init_user_emails(self):
        res = self.execute_query(
            """
            SELECT email, id, login, name
            FROM contributors
            WHERE email IS NOT NULL 
            """
        ).fetchall()

        for row in res:
            self.email_map[row[0]] = self.Id_Name_Login(id=row[1], name=row[2], login=row[3])

    def __init_code_change(self):
        res = self.execute_query(
            """
            SELECT id, commit_id, filename
            FROM code_change
            """
        ).fetchall()

        for row in res:
            self.code_change_map[self.Code_Change(commit_id=row[1], filename=row[2])] = row[0]

    def _dump_repository(self):
        logger.info("Dumping Repository...")

        res = self.execute_query(
            f"""
            SELECT repo_id 
            FROM repository
            WHERE name="{self.repo_name}" and owner="{self.repo_owner}"
            """
        ).fetchone()

        if res:
            self._set_repo_id(res[0])
        else:
            repo = RepositoryStruct(
                name=self.repo_name,
                owner=self.repo_owner
            ).process()

            obj = self.db_schema.repository_object(
                name=self.repo_name,
                owner=self.repo_owner,
                created_at=repo.created_at,
                updated_at=repo.updated_at,
                description=repo.description,
                disk_usage=repo.disk_usage,
                fork_count=repo.fork_count,
                url=repo.url,
                homepage_url=repo.homepage_url,
                primary_language=repo.primary_language,
                total_stargazers=repo.stargazer_count,
                total_watchers=repo.watcher_count,
                forked_from=repo.forked_from
            )

            self._insert(self.db_schema.repository.insert(), obj)
            self._set_repo_id()

    def _fetch_references(self):
        self.tags, self.branches = [], {}
        for reference in self.repo.listall_references():
            if 'refs/tags' in reference:
                self.tags.append(reference)
            else:
                self.branches[reference] = self.repo.lookup_reference(reference).peel().oid

    def _dump_tags(self):
        objects = []

        for tag in self.tags:
            ref = self.repo.lookup_reference(tag)
            tag_obj = self.repo[ref.target.hex]

            if isinstance(tag_obj, Tag):
                name = tag_obj.name
                msg = tag_obj.message
                tagged_object = tag_obj.hex
                tagger = self.__get_user_id(name=tag_obj.tagger.name, email=tag_obj.tagger.email, oid=tagged_object,
                                            is_author=False, is_tagger=True)
            else:
                name = tag.split('/')[-1]
                msg = tag_obj.message
                tagged_object = tag_obj.hex
                tagger = self.__get_user_id(name=tag_obj.author.name, email=tag_obj.author.email, oid=tagged_object,
                                            is_author=True, is_tagger=False)

            obj = self.db_schema.tags_object(
                name=name,
                tagged_object=tagged_object,
                message=msg,
                tagger=tagger
            )

            objects.append(obj)

        self._insert(object_=self.db_schema.tags.insert(), param=objects)

    @staticmethod
    def __get_status(status):
        if status == 1:
            return 'ADDED'
        elif status == 2:
            return 'DELETED'
        elif status == 3:
            return 'MODIFIED'
        elif status == 4:
            return 'RENAMED'
        elif status == 5:
            return 'COPIED'
        elif status == 6:
            return 'IGNORED'
        elif status == 7:
            return 'UNTRACKED'
        elif status == 8:
            return 'TYPECHANGED'
        else:
            return None

    def __init_commits(self, inverse=False):
        if not inverse:
            res = self.execute_query(
                f"""
                SELECT oid, id
                FROM commits
                WHERE repo_id={self.repo_id}
                """
            ).fetchall()

            for row in res:
                self.commit_id[row[0]] = row[1]
        else:
            res = self._conn.execute(
                f"""
                SELECT id, oid
                FROM commits
                WHERE repo_id={self.repo_id}
                """
            ).fetchall()

            for row in res:
                self.id_commit[row[0]] = row[1]

    def __get_commit_id(self, oid, pk=None):
        if not pk:
            try:
                return self.commit_id[oid]
            except KeyError:
                return None
        else:
            try:
                return self.id_commit[pk]
            except KeyError:
                self.__init_commits(inverse=True)
                res = self.__get_commit_id(oid=None, pk=pk)
                if not res:
                    raise Exception(f"GitMiner => __get_commit_id: Pk {pk} does not exist!")
                else:
                    return res

    def __check_user_id(self, email):
        try:
            map_ = self.email_map[email]
            return [map_.id, map_.login, map_.name]
        except KeyError:
            res = self.execute_query(
                f"""
                SELECT id, login, name
                FROM contributors
                WHERE email="{email}"
                """
            ).fetchone()

            if res:
                self.email_map[email] = self.Id_Name_Login(id=res[0], login=res[1], name=res[2])

            return res

    def __update_contributor(self, name, id_, login, email):
        name = name.replace('"', '""')

        self.execute_query(
            f"""
            UPDATE contributors
            SET name="{name}"
            WHERE id={id_}
            """
        )

        self.email_map[email] = self.Id_Name_Login(id=id_, login=login, name=name)

    def __get_user_id(self, name, email, oid, is_author, is_tagger):
        if not email:
            email = None

        if not name:
            name = None

        res = self.__check_user_id(email)

        if not res:
            user = CommitUserStruct(
                oid=oid,
                repo_name=self.repo_name,
                repo_owner=self.repo_owner,
                name=name,
                email=email,
                is_author=is_author,
                is_tagger=is_tagger
            ).process()

            if user is None:
                self._dump_anon_user_object(name=name, email=email, object_=self.db_schema.contributors.insert(),
                                            locked_insert=LOCKED)
            else:
                self._dump_user_object(login=None, user_object=user, object_=self.db_schema.contributors.insert(),
                                       locked_insert=LOCKED)

            return self.__get_user_id(name=name, email=email, oid=oid, is_author=is_author, is_tagger=is_tagger)
        else:
            if name == res[2]:
                return res[0]
            elif name == res[1]:
                return res[0]
            else:
                self.__update_contributor(name=name, id_=res[0], login=res[1], email=email)
                return res[0]

    def _dump_code_change(self, oid):
        commit = self.repo.get(oid)
        commit_id = self.__get_commit_id(oid)

        logger.debug(f"Dumping Code Change for commit_id -> {commit_id}...")

        code_change = []

        if commit:
            if not commit.parents:
                diffs = [self.repo.diff("4b825dc642cb6eb9a060e54bf8d69288fbee4904", commit)]
            else:
                diffs = [self.repo.diff(i, commit) for i in commit.parents]

            total_diffs = len(diffs)
            for diff in diffs:
                logger.debug(f"Remaining: {total_diffs}")
                total_diffs -= 1
                for patch in diff:
                    obj = self.db_schema.code_change_object(
                        repo_id=self.repo_id,
                        commit_id=commit_id,
                        filename=patch.delta.new_file.path,
                        additions=patch.line_stats[1],
                        deletions=patch.line_stats[2],
                        changes=patch.line_stats[1] + patch.line_stats[2],
                        change_type=self.__get_status(patch.delta.status)
                    )

                    code_change.append(obj)

            self._insert(object_=self.db_schema.code_change.insert(), param=code_change)
            logger.debug(f"Successfully dumped code change for {oid}!")

    def __get_code_change_id(self, commit_id, filename):
        try:
            return self.code_change_map[self.Code_Change(commit_id=commit_id, filename=filename)]
        except KeyError:
            return Exception(f"GitMiner => __get_code_change_id: Object does not exist! commit_id={commit_id}, "
                             f"filename:{filename}")

    def _dump_patches(self, oid):
        commit = self.repo.get(oid)
        commit_id = self.__get_commit_id(oid)

        logger.debug(f"Dumping Patch for commit_id -> {commit_id}...")

        patches = []

        if not commit.parents:
            diffs = [self.repo.diff("4b825dc642cb6eb9a060e54bf8d69288fbee4904", commit)]
        else:
            diffs = [self.repo.diff(i, commit) for i in commit.parents]

        total_diffs = len(diffs)
        for diff in diffs:
            logger.debug(f"Remaining: {total_diffs}")
            total_diffs -= 1
            for patch in diff:
                obj = self.db_schema.patches_object(
                    code_change_id=self.__get_code_change_id(commit_id, patch.delta.new_file.path),
                    patch=patch.patch
                )

                patches.append(obj)

        self._insert(object_=self.db_schema.patches.insert(), param=patches)
        logger.debug(f"Successfully dumped patch for {oid}!")

    def _dump_commit(self, oid):
        logger.debug(f"Inserting for commit: {oid}...")
        commit = self.repo.get(oid)

        if not commit.parents:
            diffs = [self.repo.diff("4b825dc642cb6eb9a060e54bf8d69288fbee4904", commit)]
        else:
            diffs = [self.repo.diff(i, commit) for i in commit.parents]

        num_files_changed = 0
        additions, deletions = 0, 0
        for diff in diffs:
            num_files_changed += diff.stats.files_changed
            additions += diff.stats.insertions
            deletions += diff.stats.deletions

        author_name = commit.author.name
        author_email = commit.author.email
        author_id = self.__get_user_id(name=author_name, email=author_email, oid=oid.hex, is_author=True,
                                       is_tagger=False) if \
            author_email.strip() else None
        authored_date = datetime.fromtimestamp(commit.author.time)

        committer_name = commit.committer.name
        committer_email = commit.committer.email

        if committer_email == "*****@*****.**":
            committer_id = author_id
        else:
            committer_id = self.__get_user_id(name=committer_name, email=committer_email, oid=oid.hex,
                                              is_author=False, is_tagger=False) if committer_email.strip() else None

        committed_date = datetime.fromtimestamp(commit.commit_time)

        message = commit.message

        if len(commit.parents) > 1:
            is_merge = 1
        else:
            is_merge = 0

        obj = self.db_schema.commits_object(
            repo_id=self.repo_id,
            oid=oid.hex,
            additions=additions,
            deletions=deletions,
            author_id=author_id,
            authored_date=authored_date,
            committer_id=committer_id,
            committer_date=committed_date,
            message=message,
            num_files_changed=num_files_changed,
            is_merge=is_merge
        )

        self._insert(object_=self.db_schema.commits.insert(), param=obj)
        logger.debug(f"Successfully dumped commit: {oid.hex}")

    def __fetch_branch_commits(self, branch_target):
        logger.info(f"Ongoing Branch {branch_target[0]}...")

        for commit in self.repo.walk(branch_target[1], GIT_SORT_TOPOLOGICAL | GIT_SORT_TIME):
            if commit.oid not in self.commits:
                self.commits.add(commit.oid)
            else:
                break

    def _fetch_commit_ids(self):
        try:
            with open(f"{ROOT}/.gras-cache/{self.repo_name}_commits.txt", "rb") as fp:
                self.commits = pickle.load(fp)

            self.commits = [Oid(hex=x) for x in self.commits]

            logger.info(f"TOTAL COMMITS: {len(self.commits)}")
            return self.commits
        except FileNotFoundError:
            logger.error("Commits file not present, dumping...")

        self.commits = set()
        with concurrent.futures.ThreadPoolExecutor(max_workers=THREADS) as executor:
            process = {executor.submit(self.__fetch_branch_commits, branch_target): branch_target for branch_target
                       in self.branches.items()}

            for future in concurrent.futures.as_completed(process):
                branch_target = process[future]
                logger.info(f"Fetched for {branch_target[0]}, Total: {len(self.commits)}")

        logger.info(f"TOTAL COMMITS: {len(self.commits)}")
        with open(f"{ROOT}/.gras-cache/{self.repo_name}_commits.txt", "wb") as fp:
            temp = [x.hex for x in self.commits]
            pickle.dump(temp, fp)
            del temp

    @timing(name="commits", is_stage=True)
    def _parse_commits(self):
        res = self.execute_query(
            f"""
            SELECT DISTINCT oid
            FROM commits
            """
        ).fetchall()

        dumped_commits = [x[0] for x in res]
        del res

        commits = list(self.commits)
        for i in range(0, len(commits), THREADS):
            proc = [mp.Process(target=self._dump_commit, args=(oid,)) for oid in commits[i:i + THREADS] if
                    oid.hex not in dumped_commits]
            for p in proc:
                p.start()

            while any([p.is_alive() for p in proc]):
                continue

    @timing(name="code change", is_stage=True)
    def _parse_code_change(self):
        id_oid = self.execute_query(
            f"""
            SELECT id, oid
            FROM commits
            """
        ).fetchall()

        dumped_ids = self.execute_query(
            f"""
            SELECT DISTINCT commit_id
            FROM code_change       
            """
        ).fetchall()

        dumped_ids = [x[0] for x in dumped_ids]

        not_dumped_commits = [x[1] for x in id_oid if x[0] not in dumped_ids]
        del dumped_ids
        del id_oid

        for i in range(0, len(not_dumped_commits), THREADS):
            proc = [mp.Process(target=self._dump_code_change, args=(oid,)) for oid in
                    not_dumped_commits[i: i + THREADS]]
            for p in proc:
                p.start()

            while any([x.is_alive() for x in proc]):
                continue

    @timing(name="patches", is_stage=True)
    def _parse_patches(self):
        self.__init_commits(inverse=True)

        res = self.execute_query(
            f"""
            SELECT id, commit_id
            FROM code_change
            """
        ).fetchall()

        cc_commit = {}
        for row in res:
            cc_commit[row[0]] = row[1]

        res = self.execute_query(
            """
            SELECT code_change_id
            FROM patches
            """
        )

        not_dumped_commits = set(cc_commit.values()).difference({cc_commit[x[0]] for x in res})
        not_dumped_commits = sorted([self.id_commit[id_] for id_ in not_dumped_commits])

        del cc_commit

        for i in range(0, len(not_dumped_commits), THREADS):
            proc = [mp.Process(target=self._dump_code_change, args=(oid,)) for oid in
                    not_dumped_commits[i: i + THREADS]]
            for p in proc:
                p.start()

            while any([x.is_alive() for x in proc]):
                continue

    @timing(name="async -> commits", is_stage=True)
    async def _async_parse_commits(self):
        loop = asyncio.get_event_loop()
        tasks = [loop.run_in_executor(self.executor, self._dump_commit, oid) for oid in self.commits]
        completed, _ = await asyncio.wait(tasks)
        for t in completed:
            logger.info(f"Dumped commit: {t.result()}")

    @timing(name="async -> code change", is_stage=True)
    async def _async_parse_code_change(self):
        loop = asyncio.get_event_loop()
        tasks = [loop.run_in_executor(self.executor, self._dump_code_change, oid) for oid in self.commits]
        completed, _ = await asyncio.wait(tasks)
        for t in completed:
            logger.info(f"Dumped Code Change for commit: {t.result()}")

    def process(self):
        if self.aio:
            self.loop.run_until_complete(self._parse_commits())
            self.loop.run_until_complete(self._parse_code_change())
        else:
            # self._parse_commits()
            self.__init_commits()
            self._parse_code_change()

        # self._parse_patches()

    def __del__(self):
        if self.aio:
            self.loop.close()
Ejemplo n.º 29
0
class GitStorage(BaseStorage):

    _backend = None

    def __init__(self, context, repo_path=None):
        self.context = context
        rp = IStorageInfo(context).path

        try:
            self.repo = Repository(discover_repository(rp))
        except KeyError:
            # discover_repository may have failed.
            raise PathNotFoundError('repository does not exist at path')

        self.checkout()  # defaults to HEAD.

    @property
    def empty_root(self):
        return {'': '_empty_root'}

    def _get_empty_root(self):
        return self.empty_root

    def _get_obj(self, path, cls=None):
        if path == '' and self._commit is None:
            # special case
            return self._get_empty_root()

        if self._commit is None:
            raise PathNotFoundError('repository is empty')

        root = self._commit.tree
        try:
            breadcrumbs = []
            fragments = list(reversed(path.split('/')))
            node = root
            oid = None
            while fragments:
                fragment = fragments.pop()
                if not fragment == '':
                    # no empty string entries, also skips over '//' and
                    # leaves the final node (if directory) as the tree.
                    oid = node[fragment].oid
                    node = self.repo.get(oid)
                breadcrumbs.append(fragment)
                if node is None:
                    # strange.  Looks like it's either submodules only
                    # have entry nodes or pygit2 doesn't fully support
                    # this.  Try to manually resolve the .gitmodules
                    # file.
                    if cls is None:
                        # Only return this if a specific type was not
                        # expected.
                        submods = parse_gitmodules(self.repo.get(
                            root[GIT_MODULE_FILE].oid).data)
                        submod = submods.get('/'.join(breadcrumbs))
                        if submod:
                            fragments.reverse()
                            return {
                                '': '_subrepo',
                                'location': submod,
                                'path': '/'.join(fragments),
                                'rev': oid.hex,
                            }

            if node and (cls is None or isinstance(node, cls)):
                return node
        except KeyError:
            # can't find what is needed in repo, raised by pygit2
            raise PathNotFoundError('path not found')

        # not what we were looking for.
        if cls == Tree:
            raise PathNotDirError('path not dir')
        elif cls == Blob:
            raise PathNotFileError('path not file')
        raise PathNotFoundError('path not found')

    @property
    def _commit(self):
        return self.__commit

    @property
    def rev(self):
        if self.__commit:
            return self.__commit.hex
        return None

    @property
    def shortrev(self):
        # TODO this is an interim solution.
        if self.rev:
            return self.rev[:12]

    def basename(self, name):
        return name.split('/')[-1]

    def checkout(self, rev=None):
        # None maps to the default revision.
        if rev is None:
            rev = 'HEAD'

        try:
            self.__commit = self.repo.revparse_single(rev)
        except KeyError:
            if rev == 'HEAD':
                # probably a new repo.
                self.__commit = None
                return
            raise RevisionNotFoundError('revision %s not found' % rev)
            # otherwise a RevisionNotFoundError should be raised.

    def files(self):
        def _files(tree, current_path=None):
            results = []
            for node in tree:
                if current_path:
                    name = '/'.join([current_path, node.name])
                else:
                    name = node.name

                obj = self.repo.get(node.oid)
                if isinstance(obj, Blob):
                    results.append(name)
                elif isinstance(obj, Tree):
                    results.extend(_files(obj, name))
            return results

        if not self._commit:
            return []
        results = _files(self._commit.tree)
        return results

    def file(self, path):
        return self._get_obj(path, Blob).data

    def listdir(self, path):
        if path:
            tree = self._get_obj(path, Tree)
        else:
            if self._commit is None:
                return []
            tree = self._commit.tree

        return [entry.name for entry in tree]

    def format(self, **kw):
        # XXX backwards compatibility??
        return kw

    def log(self, start, count, branch=None, shortlog=False):
        """
        start and branch are literally the same thing.
        """

        def _log(iterator):
            for pos, commit in iterator:
                if pos == count:
                    raise StopIteration
                yield {
                    'author': commit.committer.name,
                    'email': self._commit.committer.email,
                    'date': self.strftime(committer_dt(commit.committer)),
                    'node': commit.hex,
                    'rev': commit.hex,
                    'desc': commit.message
                }

        if start is None:
            # assumption.
            start = 'HEAD'
            try:
                self.repo.revparse_single(start)
            except KeyError:
                return []

        try:
            rev = self.repo.revparse_single(start).hex
        except KeyError:
            raise RevisionNotFoundError('revision %s not found' % start)

        iterator = enumerate(self.repo.walk(rev, GIT_SORT_TIME))

        return list(_log(iterator))

    def pathinfo(self, path):
        obj = self._get_obj(path)
        if isinstance(obj, Blob):
            return self.format(**{
                'type': 'file',
                'basename': self.basename(path),
                'size': obj.size,
                'date': self.strftime(committer_dt(self._commit.committer)),
            })
        elif isinstance(obj, dict):
            # special cases are represented as dict.
            if obj[''] == '_subrepo':
                return self.format(**{
                    'type': 'subrepo',
                    'date': '',
                    'size': 0,
                    'basename': self.basename(path),
                    # extra field.
                    'obj': obj,
                })

            elif obj[''] == '_empty_root':
                return self.format(**{
                    'type': 'folder',
                    'date': '',
                    'size': 0,
                    'basename': self.basename(path),
                })

        # Assume this is a Tree.
        return self.format(**{
            'basename': self.basename(path),
            'size': 0,
            'type': 'folder',
            'date': '',
        })

    def branches(self):
        return tuple(
            (b, self.repo.lookup_branch(b).target.hex)
            for b in self.repo.listall_branches()
        )

    def tags(self):
        return tuple(
            (b[10:], self.repo.lookup_reference(b).target.hex)
            for b in self.repo.listall_references()
            if b.startswith('refs/tags')
        )
Ejemplo n.º 30
0
class GitRepo(object):

    ''' git repo class '''

    def __init__(self, path):
        try:
            self.__repo = Repository(path)
        except Exception as e:
            self.__repo = None
            print(e)

    def get_info(self):
        if not self.__repo:
            return None
        signature = self.__repo.default_signature
        result = {
            'path': self.__repo.path,
            'workdir': self.__repo.workdir,
            'bare': self.__repo.is_bare,
            'empty': self.__repo.is_empty,
            'name': signature.name,
            'email': signature.email,
            'time': signature.time,
            'offset': signature.offset,
        }
        return result

    def get_all_references(self):
        return self.__repo.listall_references()

    def get_reference(self, name):
        if not self.__repo:
            return None
        ref = None
        try:
            ref = self.__repo.lookup_reference(name)
        except Exception as e:
            print(e)
        return ref

    def get_all_branches(self, branch_type=None):
        if not self.__repo:
            return None
        if branch_type:
            return self.__repo.listall_branches(branch_type)
        r = self.__repo.listall_branches(GIT_BRANCH_LOCAL | GIT_BRANCH_REMOTE)
        return r

    def get_branch(self, name, branch_type=GIT_BRANCH_LOCAL):
        if not self.__repo:
            return None
        return self.__repo.lookup_branch(name, branch_type)

    def check_branch(self, name, branch_type=None):
        if not branch_type:
            if '/' in name:
                branch_type = GIT_BRANCH_REMOTE
            else:
                branch_type = GIT_BRANCH_LOCAL
        try:
            result = self.get_branch(name, branch_type)
            return result
        except Exception as e:
            print(e)
            return False

    def get_current_commit(self):
        if not self.__repo:
            return None
        commit = self.__repo.revparse_single('HEAD')
        return self.get_commit(commit)

    def get_commit_by_branch(self, branch):
        if not self.__repo:
            return None
        query = 'refs/'
        if hasattr(branch, 'remote_name'):
            query += 'remotes/'
        else:
            query += 'heads/'
        query += branch.branch_name
        try:
            ref = self.get_reference(query)
            commit = ref.target
            return self.get_commit(commit)
        except Exception as e:
            print(e)
            return None

    def get_commit_by_tag(self, tag):
        if self.__repo is None:
            return None
        if tag:
            commit = tag.get_object()
            return self.get_commit(commit)
        return None

    def get_commit(self, oid_or_commit):
        ''' return a commit w/ json '''
        if not self.__repo or not oid_or_commit:
            return None
        try:
            commit = oid_or_commit
            if not isinstance(oid_or_commit, Commit):
                commit = self.__repo.get(oid_or_commit)
            if commit and commit.type == GIT_OBJ_COMMIT:
                # t1 = self.__repo.revparse_single('HEAD^')
                # t2 = self.__repo.revparse_single('HEAD^^')
                # patches = self.__repo.diff(t1, t2)
                # for p in patches:
                #     print(p.new_file_path)
                result = {
                    'id': str(commit.id),
                    'author': commit.author.name,
                    'commiter': commit.committer.name,
                    'message': commit.message,
                    'message_encoding': commit.message_encoding,
                    'tree': str(commit.tree_id),
                    'parent': [str(pid) for pid in commit.parent_ids],
                    'time': str(commit.commit_time),
                    'time_offset': str(commit.commit_time_offset),
                }
                return result
        except Exception as e:
            print(e)
        return None

    def get_commits(self, depth=10, oid_or_commit=None):
        result = []
        if depth == 0:
            return result
        if oid_or_commit:
            commit = self.get_commit(oid_or_commit)
        else:
            commit = self.get_current_commit()
        if not commit:
            return result
        # TODO: starting from a commit or its parent
        # TODO: author
        result.append(commit)
        depth -= 1
        if commit and commit['parent']:
            for parent in commit['parent']:
                    result.extend(self.get_commits(depth, parent))
        return result

    def get_commits_by_branch(self, name, path=None):
        if not self.__repo:
            return None
        if self.check_branch(name):
            ref = self.get_reference('refs/heads/' + name)
            if ref:
                commit = ref.target
                commits = self.get_commits(commit)
                result = {}
                for key, val in commits.items():
                    if self.check_commit_by_path(val, path):
                        result[key] = val
                return result
        return None

    def check_tag(self, name):
        try:
            ref = self.get_reference('refs/tags/' + name)
            return ref
        except Exception:
            return False

    def get_commits_by_tag(self, tag, path=None):
        if not self.__repo:
            return None
        if tag:
            commit = tag.target
            commits = self.get_commits(commit)
            result = {}
            for key, val in commits.items():
                if self.check_commit_by_path(val, path):
                    result[key] = val
            return result
        return None

    def check_commit_by_path(self, commit, path):
        if not commit:
            return False
        if path is None or len(path) == 0:
            return True
        result = self.get_tree(commit['tree'])

        if not isinstance(path, list):
            path = path.strip().split('/')

        for name in path:
            name = name.strip()
            if name in result:
                oid = result[name]
                result = self.get_tree(oid)

                if not result:
                    result = self.get_blob(oid)
        return result is not None

    def get_tree(self, oid, ppath=None):
        if not self.__repo:
            return None
        try:
            tree = self.__repo.get(oid)
            if tree and tree.type == GIT_OBJ_TREE:
                result = {}
                for entry in tree:
                    item = {
                        'id': str(entry.id)
                    }
                    obj = self.__repo.get(entry.id)
                    if obj.type == GIT_OBJ_BLOB:
                        item['type'] = 'blob'
                    elif obj.type == GIT_OBJ_TREE:
                        item['type'] = 'tree'
                    item['ppath'] = ppath
                    result[entry.name] = item
                return result
        except Exception as e:
            print(e)
        return None

    def get_tree_by_commit(self, commit, path=None):
        if not commit:
            return None
        result = self.get_tree(commit['tree'])
        if not path:
            return result

        # if not isinstance(path, list):
        #     path = path.strip().split('/')

        try:
            for name in path:
                oid = result[name]['id']
                p = result[name]['ppath']
                p = name if not p else p + '/' + name
                result = self.get_tree(oid, p)
                if not result:
                    break
        except Exception as e:
            print(e)
            result = None
        return result

    def get_current_root(self):
        tree = self.get_current_commit()
        if tree:
            return self.get_tree(tree['tree'])
        return None

    def get_whole_tree(self, oid):
        ''' tree w/ json '''
        if not self.__repo:
            return None
        result = tree_walker(self.__repo, oid)
        return result

    def get_blob(self, oid):
        ''' blob w/ json '''
        if not self.__repo or not oid:
            return None
        try:
            blob = self.__repo.get(oid)
            if blob and blob.type == GIT_OBJ_BLOB:
                content = blob.is_binary and None or blob.data.decode(
                    'utf8', 'ignore')
                result = {
                    'id': str(blob.id),
                    'content': content,
                    'size': blob.size,
                }
                return result
        except Exception as e:
            print(e)
        return None

    def get_blob_by_commit(self, commit, path=None):

        try:
            tree = self.get_tree_by_commit(commit, path[:-1])
            oid = tree[path[-1]]['id']
            result = self.get_blob(oid)
            return result
        except Exception as e:
            print(e)
            return None

    def get_tag(self, oid):
        ''' blob w/ json '''
        if not self.__repo or not oid:
            return None
        try:
            tag = self.__repo.get(oid)
            if tag and tag.type == GIT_OBJ_TAG:
                result = {
                    'id': str(oid),
                    'name': tag.name,
                    'target': str(tag.target.id),
                    'tagger': tag.tagger,
                    'message': tag.message,
                }
                return result
        except Exception as e:
            print(e)
        return None

    def get_patches(self, a=None, b=None):
        try:
            if not a:
                a = 'HEAD'
            if not b:
                b = a + '^'
            t1 = self.__repo.revparse_single(a)
            t2 = self.__repo.revparse_single(b)
            patches = self.__repo.diff(t1, t2)
            result = []
            for patch in patches:
                p = {
                    'old_file_path': patch.old_file_path,
                    'new_file_path': patch.new_file_path,
                    'old_oid': str(patch.old_oid),
                    'new_oid': str(patch.new_oid),
                    'status': patch.status,
                    'similarity': patch.similarity,
                    'additions': patch.additions,
                    'deletions': patch.deletions,
                    'binary': patch.is_binary,
                    'hunks': [],
                }
                for hunk in patch.hunks:
                    h = {
                        'old_start': hunk.old_start,
                        'old_lines': hunk.old_lines,
                        'new_start': hunk.new_start,
                        'new_lines': hunk.new_lines,
                        'lines': hunk.lines,
                    }
                    p['hunks'].append(h)
                result.append(p)
            return result
        except Exception as e:
            print(e)
        return None
Ejemplo n.º 31
0
class ChangeCtxDefault(ChangeCtx):
    """Class with the specific implementation details for the change context
    of the default revision state of the repository. It inherits the common
    implementation from the class :class:`ChangeCtxBase`.
    """

    def __init__(self, repo_path):
        self._repo_path = repo_path
        self._repo = Repository(self._repo_path)
        self._ctx = self._repo[self.revision_id]

    @locked_cached_property
    def files(self):
        def r(_files, repo, tree, prefix=None):
            for entry in tree:
                obj = repo[entry.oid]
                filename = prefix and (prefix + '/' + entry.name) or entry.name
                if obj.type == GIT_OBJ_TREE:
                    r(_files, repo, obj, filename)
                elif obj.type == GIT_OBJ_BLOB:
                    _files.append(filename)
                else:
                    raise RuntimeError('Invalid object: %s' % filename)
        f = []
        r(f, self._repo, self._ctx.tree)
        return sorted(f)

    @locked_cached_property
    def revision_id(self):
        """This property should be cached because the lookup_reference method
        reloads itself.
        """
        try:
            ref = self._repo.lookup_reference('refs/heads/master')
        except Exception:
            raise RuntimeError('Branch "master" not found!')
        return ref.target

    def needs_reload(self):
        try:
            ref = self._repo.lookup_reference('refs/heads/master')
        except Exception:
            return True
        return self.revision_id != ref.target

    def filectx_needs_reload(self, filectx):
        try:
            ref = self._repo.lookup_reference('refs/heads/master')
        except Exception:
            raise RuntimeError('Branch "master" not found!')
        return filectx._changectx.oid != ref.target

    def published(self, date, now):
        return date <= now

    def etag(self, filectx):
        return 'blohg-%i-%i-%s' % (filectx.mdate or filectx.date,
                                   len(filectx.data), adler32(filectx.path)
                                   & 0xffffffff)

    def get_filectx(self, path):
        return FileCtx(self._repo, self._ctx, path)
Ejemplo n.º 32
0
Archivo: doc.py Proyecto: obmarg/resumr
class Document(object):
    '''
    Class representing a document, interacts with the git
    database
    '''

    def __init__( self, name, create=False, rootPath=None ):
        '''
        Constructor

        Args:
            name        The name of the document
            create      If true, will create a document
            rootPath    The rootPath to use (if not supplied, uses default)
        Exceptions:
            RepoNotFound if repository isn't found
        '''
        if not rootPath:
            rootPath = DEFAULT_ROOT_PATH
        targetDir = os.path.join( rootPath, name + '.git' )
        if create:
            # Create a bare repository
            self.repo = init_repository( targetDir, True )
            self._CreateMasterBranch()
        else:
            try:
                self.repo = Repository( targetDir )
            except KeyError:
                raise RepoNotFound()

    def _CreateMasterBranch( self ):
        '''
        Creates the master branch on the repo w/ default file.
        For now this is just a file named layout
        '''
        commitId = CommitBlob(
                self.repo, '', SECTION_INDEX_FILENAME, 'Initial commit'
                )
        self.repo.create_reference( MASTER_REF, commitId )

    @staticmethod
    def _IsSectionRef( refName ):
        '''
        Checks if a refererence name refers to a section

        Args:
            refName:    The reference name
        Returns:
            A boolean
        '''
        return refName.startswith( SECTION_REF_PREFIX )

    @staticmethod
    def _RefNameToSectionName( refName ):
        '''
        Converts a reference name to a section name

        Args:
            ref:    The reference name
        '''
        return refName[ len(SECTION_REF_PREFIX) : ]

    def _SectionRefs( self ):
        '''
        Gets an iterator over the section refs
        '''
        return (
                (
                    self._RefNameToSectionName( ref ),
                    self.repo.lookup_reference( ref ),
                    )
                for ref in self.repo.listall_references()
                if self._IsSectionRef( ref )
                )

    def Sections( self ):
        '''
        Gets an iterator over all the sections
        '''
        return (
                Section( name, self.repo[ref.oid], self.repo )
                for name, ref in self._SectionRefs()
                )

    def CurrentSections( self ):
        '''
        Gets the current sections with their positions

        Returns:
            A list of tuples ( position, section )
        '''
        return enumerate( self._CurrentSections() )

    def _CurrentSections( self ):
        '''
        Internal method to get the current sections
        in order, without position numbers

        Returns:
            An iterator over the sections
        '''
        index = SectionIndex(self.repo)
        for s in index.CurrentSections():
            yield self.FindSection(s.name)

    def FindSection( self, name ):
        '''
        Finds a section by name

        Args:
            name    The name of the section to find
        Returns:
            The section if found
        Exceptions:
            SectionNotFound if section not found
        '''
        try:
            ref = self.repo.lookup_reference(
                    SECTION_REF_PREFIX + name
                    )
        except KeyError:
            raise SectionNotFound()
        return Section( name, self.repo[ref.oid], self.repo )

    def AddSection( self, name, content='' ):
        '''
        Creates a new section

        Args:
            name        The name of the section
            content     The optional initial content of the
                        section

        Returns:
            The new Section object
        '''
        # TODO: Should probably make
        #       sure no such section exists
        commitId = CommitBlob(
                self.repo,
                content,
                name,
                'Created section ' + name
                )
        ref = self.repo.create_reference(
                SECTION_REF_PREFIX + name,
                commitId
                )
        index = SectionIndex(self.repo)
        index.AddSection( name )
        index.Save( self.repo )
        return Section( name, self.repo[ ref.oid ], self.repo )

    def RemoveSection( self, name ):
        '''
        Removes a section.
        This function does not actually delete the data associated
        with a section, it just removes it from the index.

        Args:
            name    The name of the section to remove
        '''
        index = SectionIndex( self.repo )
        index.RemoveSection( name )
        index.Save( self.repo )
Ejemplo n.º 33
0
def get_default_branch(repo_path):
    repo = Repository(repo_path)
    return repo.lookup_reference('HEAD').target
Ejemplo n.º 34
0
def get_working_copy_tag(repo: pygit2.Repository) -> pygit2.Reference:
    return repo.lookup_reference(WORKING_COPY_TAG_REF)