Example #1
0
def sync(repo: pygit2.Repository, branch_name: str) -> None:
    """
    Tries to update the `branch_name` branch of the `repo` repo to the latest
    upstream branch state.
    If the branch is up to date, does nothing.
    If the branch can be fast-forwarded, resets to the upstream.
    Otherwise, fails with an error.
    """
    branch = repo.branches.local[branch_name]
    if not branch.is_head():
        raise ValueError(branch)

    try:
        remote = repo.remotes['origin']
    except KeyError:
        return

    remote.fetch(callbacks=RemoteCallbacks())
    upstream_branch = branch.upstream
    if not upstream_branch:
        return

    merge_state, _ = repo.merge_analysis(upstream_branch.target, branch.name)
    if merge_state & pygit2.GIT_MERGE_ANALYSIS_UP_TO_DATE:
        return
    if not (merge_state & pygit2.GIT_MERGE_ANALYSIS_FASTFORWARD):
        raise ValueError(branch)

    repo.reset(upstream_branch.target, pygit2.GIT_RESET_HARD)
    repo.checkout(refname=branch)
Example #2
0
    def launch(self):
        # init repo
        self._info('Locating the repository..')
        try:
            repo = Repository(REPO_PATH)
        except GitError:
            self._error('Failed to locate the repository!')
            return

        # fetch
        self._info('Fetching repository..')
        try:
            repo.remotes['origin'].fetch()
        except GitError:
            self._error('Failed to fetch the repository!')

        # reset repository
        self._info('Resetting the repository..')
        try:
            repo.reset(
                repo.lookup_reference('refs/remotes/origin/master').target, GIT_RESET_HARD)
        except GitError:
            self._error('Failed to reset the repository!')

        # launch
        self._info('Launching..')
        try:
            subprocess.Popen(EXE_PATH, cwd=EXE_WORKDIR)
        except OSError:
            self._error('Failed to launch!')
        else:
            self.destroy()
Example #3
0
config = repo.config

remote_url = repo.remotes[args.remote].url
pass_path = None
for glob in credentials_mapping.keys():
    if fnmatch.fnmatch(remote_url, glob):
        pass_path = credentials_mapping[glob]["target"]

# FIXME: user identity (name + email) is not always set at repo level
# that said, we need a SPOT for git identities as used/implemented
# in git-identity emacs package
source_branch_name = args.update_source_branch if args.update_source_branch != "" else get_active_branch(
    repo)
remote = resolve_remote(repo, args.remote)
if not remote:
    log_error(f"cannot find remote '{args.remote}'")
    sys.exit(1)
if args.update_op == "fetch":
    remote.fetch(refspecs=[f"refs/heads/*:refs/heads/*"])
elif args.update_op == "merge":
    source_branch_head = repo.references[source_branch_name].resolve().target
    repo.merge(source_branch_head)
elif args.update_op == "rebase":
    source_branch = repo.lookup_branch(source_branch_name, GIT_BRANCH_REMOTE)
    dest_branch = repo.lookup_branch(get_active_branch(repo))
    dest_branch.set_target(source_branch.target)
    # Fast-forwarding with set_target() leaves the index and the working tree
    # in their old state. That's why we need to checkout() and reset()
    repo.checkout(f"refs/heads/{dest_branch.name}")
    repo.reset(dest_branch.target, GIT_RESET_HARD)
Example #4
0
def post(event, context):
    # Logging the entire event is a cheap simple way to make debugging easier
    # Often times just being able to see the event information quickly can help
    # Troubleshoot an issue faster than hooking up a debugger
    logger.info(event)

    # We always want to take the shortest path through our functions. Check for anything fatal first.
    try:
        output_bucket = os.environ['output_bucket']
    except:
        raise Exception(
            'Output Bucket not defined. Set the environment variable for the function'
        )

    try:
        comment_function = os.environ['comment_function']
    except:
        raise Exception(
            'Comment Function not defined. Set the environment variable for the function'
        )

    # Here we take a few steps to get the JSON into the body object
    # If this came in as a proxy request, or a direct API Gateway request
    # or a boto3 invokation the format of the body could be a few different types
    # With this stepped approach we can guarantee that no matter how this was called
    # we will have JSON in the body variable.
    if "body" in event:
        body = json.loads(event['body'])
    else:
        try:
            body = json.loads(event)
        except:
            body = event

    # We will still validate this before doing anything with it, but if we are missing
    # any essential components we should end early to save processing time.
    # No point in computing hashes for a payload that is missing data we need.
    try:
        full_name = body['repository']['full_name']
    except KeyError:
        raise Exception('Failed to find full_name in json post body')

    try:
        remote_url = body['repository']['clone_url']
    except KeyError:
        raise Exception('Failed to find clone_url name in json post body')

    # Another short circuit. If we know this wasn't called locally then it was likely
    # called via the webhook or some HTTP entity so we need to see what kind of event
    # it is and process it appropriately. Otherwise we can save ourself a bunch of validation
    if "local_invoke" not in body:

        try:
            github_secrets = os.environ['github_secrets']
        except:
            raise Exception(
                'Github secrets not defined. Set the environment variable for the function'
            )

        if "headers" in event and "X-GitHub-Event" in event['headers']:
            # We only care about push events, if this isn't one politely exit
            if event['headers']['X-GitHub-Event'] != "push":
                return {
                    "statusCode": 200,
                    "body": json.dumps('Skipping - Not a push event')
                }

        # We split this env variable because we could be re-using this function for multiple API
        # endpoints, multiple repos etc. It is best practice to have a secret per repo
        # so even if we use this exact endpoint we can still feed it multiple repos with multiple
        # keys. We define each key with a , to separate them.
        apikeys = github_secrets.split(',')

        # set a validation key, we will check multiple keys so it holds our result
        secure = False

        # Compute out the hash and validate the signature. If it passes set secure, otherwise throw an error
        if 'X-Hub-Signature' in event['headers'].keys():
            signature = event['headers']['X-Hub-Signature']
            for k in apikeys:
                computed_hash = hmac.new(k.encode('ascii'),
                                         event['body'].encode('ascii'),
                                         hashlib.sha1)
                computed_signature = '='.join(
                    ['sha1', computed_hash.hexdigest()])
                hmac.compare_digest(computed_signature.encode('ascii'),
                                    signature.encode('ascii'))
                if hmac.compare_digest(computed_signature.encode('ascii'),
                                       signature.encode('ascii')):
                    secure = True
        if secure == False:
            raise Exception(
                'Failed to validate authenticity of webhook message')

    repo_name = full_name + '/branch/' + branch_name
    repo_path = '/tmp/%s' % repo_name

    # If we have an existing repo (if this function is still warm / is not a cold start)
    # we can re-use that repo on the file system and update it to save us some time and bandwidth
    try:
        repository_path = discover_repository(repo_path)
        repo = Repository(repository_path)
        logger.info('found existing repo, using that...')
    # If a previous repo is not found we will create it
    except Exception:
        logger.info('creating new repo for %s in %s' % (remote_url, repo_path))
        repo = create_repo(repo_path, remote_url)

    # Re-used or created, we now have a repo reference to pull against
    pull_repo(repo, branch_name, remote_url)

    # Now that we have the raw markdown files we can inject our comments
    # Into the markdown files before we compile the site so we take advantage
    # of all of the theme styling with minimal effort
    add_comments(repo_path + "/content/posts/", comment_function)

    # Compile the site to our pre-defined path
    build_hugo(repo_path, build_path)

    # Sync the site to our public s3 bucket for hosting
    upload_to_s3(build_path, output_bucket)

    if reset:
        logger.info('Resetting Repo...')
        repo.reset(repo.head.target, GIT_RESET_HARD)

    if cleanup:
        logger.info('Cleanup Lambda container...')
        shutil.rmtree(repo_path)

    # We have to return a status code otherwise the API Gateway will give a server error
    # however we are likely exceeding the 29s hard timeout limit on the API gateway
    # but if we can return correctly we should attempt to, that window could be changed later
    # or we could execute in time occasionally
    return {
        "statusCode": 200,
        "body": json.dumps('Successfully updated %s' % repo_name)
    }
Example #5
0
class GitRepo:
    """A class that manages a git repository.

    This class enables versiong via git for a repository.
    You can stage and commit files and checkout different commits of the repository.
    """

    path = ''
    pathspec = []
    repo = None
    callback = None
    author_name = 'QuitStore'
    author_email = '*****@*****.**'
    gcProcess = None

    def __init__(self, path, origin=None, gc=False):
        """Initialize a new repository from an existing directory.

        Args:
            path: A string containing the path to the repository.
            origin: The remote URL where to clone and fetch from and push to
        """
        logger = logging.getLogger('quit.core.GitRepo')
        logger.debug('GitRepo, init, Create an instance of GitStore')
        self.path = path
        self.gc = gc

        if not exists(path):
            try:
                makedirs(path)
            except OSError as e:
                raise Exception('Can\'t create path in filesystem:', path, e)

        try:
            self.repo = Repository(path)
        except KeyError:
            pass
        except AttributeError:
            pass

        if origin:
            self.callback = QuitRemoteCallbacks()

        if self.repo:
            if self.repo.is_bare:
                raise QuitGitRepoError('Bare repositories not supported, yet')

            if origin:
                # set remote
                self.addRemote('origin', origin)
        else:
            if origin:
                # clone
                self.repo = self.cloneRepository(origin, path, self.callback)
            else:
                self.repo = init_repository(path=path, bare=False)

    def cloneRepository(self, origin, path, callback):
        try:
            repo = clone_repository(url=origin,
                                    path=path,
                                    bare=False,
                                    callbacks=callback)
            return repo
        except Exception as e:
            raise QuitGitRepoError(
                "Could not clone from: {} origin. {}".format(origin, e))

    def addall(self):
        """Add all (newly created|changed) files to index."""
        self.repo.index.read()
        self.repo.index.add_all(self.pathspec)
        self.repo.index.write()

    def addfile(self, filename):
        """Add a file to the index.

        Args:
            filename: A string containing the path to the file.
        """
        index = self.repo.index
        index.read()

        try:
            index.add(filename)
            index.write()
        except Exception as e:
            logger.info(
                "GitRepo, addfile, Could not add file  {}.".format(filename))
            logger.debug(e)

    def addRemote(self, name, url):
        """Add a remote.

        Args:
            name: A string containing the name of the remote.
            url: A string containing the url to the remote.
        """
        try:
            self.repo.remotes.create(name, url)
            logger.info("Successfully added remote: {} - {}".format(name, url))
        except Exception as e:
            logger.info("Could not add remote: {} - {}".format(name, url))
            logger.debug(e)

        try:
            self.repo.remotes.set_push_url(name, url)
            self.repo.remotes.set_url(name, url)
        except Exception as e:
            logger.info("Could not set push/fetch urls: {} - {}".format(
                name, url))
            logger.debug(e)

    def checkout(self, commitid):
        """Checkout a commit by a commit id.

        Args:
            commitid: A string cotaining a commitid.
        """
        try:
            commit = self.repo.revparse_single(commitid)
            self.repo.set_head(commit.oid)
            self.repo.reset(commit.oid, GIT_RESET_HARD)
            logger.info("Checked out commit: {}".format(commitid))
        except Exception as e:
            logger.info("Could not check out commit: {}".format(commitid))
            logger.debug(e)

    def commit(self, message=None):
        """Commit staged files.

        Args:
            message: A string for the commit message.
        Raises:
            Exception: If no files in staging area.
        """
        if self.isstagingareaclean():
            # nothing to commit
            return

        index = self.repo.index
        index.read()
        tree = index.write_tree()

        try:
            author = Signature(self.author_name, self.author_email)
            comitter = Signature(self.author_name, self.author_email)

            if len(self.repo.listall_reference_objects()) == 0:
                # Initial Commit
                if message is None:
                    message = 'Initial Commit from QuitStore'
                self.repo.create_commit('HEAD', author, comitter, message,
                                        tree, [])
            else:
                if message is None:
                    message = 'New Commit from QuitStore'
                self.repo.create_commit('HEAD', author, comitter, message,
                                        tree,
                                        [self.repo.head.get_object().hex])
            logger.info('Updates commited')
        except Exception as e:
            logger.info('Nothing to commit')
            logger.debug(e)

        if self.gc:
            self.garbagecollection()

    def commitexists(self, commitid):
        """Check if a commit id is part of the repository history.

        Args:
            commitid: String of a Git commit id.
        Returns:
            True, if commitid is part of commit log
            False, else.
        """
        if commitid in self.getids():
            return True
        else:
            return False

    def garbagecollection(self):
        """Start garbage collection.

        Args:
            commitid: A string cotaining a commitid.
        """
        try:
            # Check if the garbage collection process is still running
            if self.gcProcess is None or self.gcProcess.poll() is not None:
                # Start garbage collection with "--auto" option,
                # which imidietly terminates, if it is not necessary
                self.gcProcess = Popen(["git", "gc", "--auto", "--quiet"],
                                       cwd=self.path)
                logger.debug('Spawn garbage collection')
        except Exception as e:
            logger.debug('Git garbage collection failed to spawn')
            logger.debug(e)

    def getpath(self):
        """Return the path of the git repository.

        Returns:
            A string containing the path to the directory of git repo
        """
        return self.path

    def getcommits(self):
        """Return meta data about exitsting commits.

        Returns:
            A list containing dictionaries with commit meta data
        """
        commits = []
        if len(self.repo.listall_reference_objects()) > 0:
            for commit in self.repo.walk(self.repo.head.target,
                                         GIT_SORT_REVERSE):
                commits.append({
                    'id':
                    str(commit.oid),
                    'message':
                    str(commit.message),
                    'commit_date':
                    datetime.fromtimestamp(
                        commit.commit_time).strftime('%Y-%m-%dT%H:%M:%SZ'),
                    'author_name':
                    commit.author.name,
                    'author_email':
                    commit.author.email,
                    'parents': [c.hex for c in commit.parents],
                })
        return commits

    def getids(self):
        """Return meta data about exitsting commits.

        Returns:
            A list containing dictionaries with commit meta data
        """
        ids = []
        if len(self.repo.listall_reference_objects()) > 0:
            for commit in self.repo.walk(self.repo.head.target,
                                         GIT_SORT_REVERSE):
                ids.append(str(commit.oid))
        return ids

    def isgarbagecollectionon(self):
        """Return if gc is activated or not.

        Returns:
            True, if activated
            False, if not
        """
        return self.gc

    def isstagingareaclean(self):
        """Check if staging area is clean.

        Returns:
            True, if staginarea is clean
            False, else.
        """
        status = self.repo.status()

        for filepath, flags in status.items():
            if flags != GIT_STATUS_CURRENT:
                return False

        return True

    def pull(self, remote='origin', branch='master'):
        """Pull if possible.

        Return:
            True: If successful.
            False: If merge not possible or no updates from remote.
        """
        try:
            self.repo.remotes[remote].fetch()
        except Exception as e:
            logger.info("Can not pull:  Remote {} not found.".format(remote))
            logger.debug(e)

        ref = 'refs/remotes/' + remote + '/' + branch
        remoteid = self.repo.lookup_reference(ref).target
        analysis, _ = self.repo.merge_analysis(remoteid)

        if analysis & GIT_MERGE_ANALYSIS_UP_TO_DATE:
            # Already up-to-date
            pass
        elif analysis & GIT_MERGE_ANALYSIS_FASTFORWARD:
            # fastforward
            self.repo.checkout_tree(self.repo.get(remoteid))
            master_ref = self.repo.lookup_reference('refs/heads/master')
            master_ref.set_target(remoteid)
            self.repo.head.set_target(remoteid)
        elif analysis & GIT_MERGE_ANALYSIS_NORMAL:
            self.repo.merge(remoteid)
            tree = self.repo.index.write_tree()
            msg = 'Merge from ' + remote + ' ' + branch
            author = Signature(self.author_name, self.author_email)
            comitter = Signature(self.author_name, self.author_email)
            self.repo.create_commit('HEAD', author, comitter, msg, tree,
                                    [self.repo.head.target, remoteid])
            self.repo.state_cleanup()
        else:
            logger.debug('Can not pull. Unknown merge analysis result')

    def push(self, remote='origin', branch='master'):
        """Push if possible.

        Return:
            True: If successful.
            False: If diverged or nothing to push.
        """
        ref = ['refs/heads/' + branch]

        try:
            remo = self.repo.remotes[remote]
        except Exception as e:
            logger.info(
                "Can not push. Remote: {} does not exist.".format(remote))
            logger.debug(e)
            return

        try:
            remo.push(ref, callbacks=self.callback)
        except Exception as e:
            logger.info("Can not push to {} with ref {}".format(
                remote, str(ref)))
            logger.debug(e)

    def getRemotes(self):
        remotes = {}

        try:
            for remote in self.repo.remotes:
                remotes[remote.name] = [remote.url, remote.push_url]
        except Exception as e:
            logger.info('No remotes found.')
            logger.debug(e)
            return {}

        return remotes
def mirror_tarballs(target_dir,
                    tmp_dir,
                    git_repo,
                    git_revision,
                    concurrent=DEFAULT_CONCURRENT_DOWNLOADS):
    global failed_entries
    global download_queue
    create_mirror_dirs(target_dir, git_revision)
    download_queue = queue.Queue()
    threads = []
    repo_path = os.path.join(tmp_dir, "nixpkgs")
    os.makedirs(repo_path, exist_ok=True)
    with ccd(repo_path):
        exists = False
        try:
            repo = Repository(os.path.join(repo_path, ".git"))
            repo.remotes["origin"].fetch()
            exists = True
        except:
            pass
        if not exists:
            repo = clone_repository(git_repo, repo_path)
        repo.reset(git_revision, GIT_RESET_HARD)
        with ccd(repo.workdir):
            success = False
            env = os.environ.copy()
            env["NIX_PATH"] = "nixpkgs={}".format(repo.workdir)
            for expr in NIX_EXPRS:
                res = subprocess.run(nix_instantiate_cmd(expr),
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     env=env)
                if res.returncode != 0:
                    print("nix instantiate failed!")
                else:
                    success = True
                    break
            if success is False:
                return "fatal: all nix instantiate processes failed!"
            output = json.loads(res.stdout.decode('utf-8').strip())
    #    with open(os.path.join(target_dir, "tars.json"), "w") as f:
    #        f.write(json.dumps(output))
    #with open(os.path.join(target_dir, "tars.json"), "r") as f:
    #    output = json.loads(f.read())
    for idx, entry in enumerate(output):
        if not (len(
            [x
             for x in VALID_URL_SCHEMES if entry['url'].startswith(x)]) == 1):
            append_failed_entry(entry)
            print("url {} is not in the supported url schemes.".format(
                entry['url']))
            continue
        elif (len(check_presence(target_dir, entry['hash']))
              or len(check_presence(target_dir, entry['name']))):
            print("url {} already mirrored".format(entry['url']))
            continue
        else:
            download_queue.put(entry)
    for i in range(concurrent):
        t = threading.Thread(target=download_worker,
                             args=(
                                 target_dir,
                                 git_revision,
                                 repo.workdir,
                             ))
        threads.append(t)
        t.start()
    download_queue.join()
    for i in range(concurrent):
        download_queue.put(None)
    for t in threads:
        t.join()
    log = "########################\n"
    log += "SUMMARY OF FAILED FILES:\n"
    log += "########################\n"
    for entry in failed_entries:
        log += "url:{}, name:{}\n".format(entry['url'], entry['name'])
    with open(os.path.join(target_dir, "revisions", git_revision, "log"),
              "w") as f:
        f.write(log)
    return log