Exemple #1
0
def _fetch_file_content_hashes(cache_file, repo_directory, repo_url):
    # get (or initialize) the git repository
    repo = git_tool.get_repository(repo_directory, repo_url)

    # load existing hashes / commits
    result = load(cache_file)
    existing_commit_hexsha_list = result['commits']
    print('{:6} {:4} existing commits'.format('', len(existing_commit_hexsha_list)))

    # get the new commits
    all_commit_list = get_all_commit_list(repo)
    new_commit_list = get_new_commit_list(all_commit_list, existing_commit_hexsha_list)

    print('{:6} {:4} new commits'.format('found', len(new_commit_list)))

    # get file content hashes
    content_hash_list = get_content_hash_list(repo_directory, repo, new_commit_list)

    # save the result if there is a least a new commit
    if len(new_commit_list) > 0:
        # if history has rewritten, keep erased commits
        for content_hash in content_hash_list:
            if content_hash not in result['hashes']:
                result['hashes'].append(content_hash)

        new_commit_hexsha_list = commit_list_to_hexsha_list(new_commit_list)
        result['commits'] = existing_commit_hexsha_list + new_commit_hexsha_list

        # save the result
        save(cache_file, result)

    return set(result['hashes'])
Exemple #2
0
def fetch_hashes_from_git_url(git_url=None):
    # get repo_directory from the repo_url
    repo_directory = get_git_repository_path(git_url)
    # get (or initialize) the git repository
    repo = git_tool.get_repository(repo_directory, git_url)
    # get commit list
    all_commit_list = get_all_commit_list(repo)
    # forks
    with get_engine().connect() as connection:
        with new_session(bind=connection) as session:
            # get fork_obj
            fork_obj = session.execute(
                                    select(Fork)
                                    .where(Fork.git_url == git_url)
                               ).scalar()
            if fork_obj is None:
                fork_obj = Fork(git_url=git_url)
                session.add(fork_obj)

            # get the existing commits
            existing_commit_obj = session.execute(
                                    select(Commit)
                                    .where(Commit.sha.in_([commit.hexsha for commit in all_commit_list]))
                                  ).all()
            existing_commit_sha_tuple = tuple(commit_obj[0].sha for commit_obj in existing_commit_obj)
            new_commit_list = [commit for commit in all_commit_list if commit.hexsha not in existing_commit_sha_tuple]

            print('  {:5} existing commit(s)'.format(len(existing_commit_sha_tuple)))
            for row in existing_commit_obj:
                commit_obj = row[0]
                if fork_obj not in commit_obj.forks:
                    commit_obj.forks.append(fork_obj)
                    session.add(commit_obj)
            session.commit()

            del fork_obj
            del existing_commit_obj
            del existing_commit_sha_tuple

        print('  {:5} new commit(s)'.format(len(new_commit_list)))

        commit_iterator = get_content_list_per_commit_iterator(repo_directory, repo, new_commit_list)
        for commit_sha, content_hash_list in commit_iterator:
            # one SQL transaction per commit
            with new_session(bind=connection) as session:
                insert_commit(session, git_url, commit_sha, content_hash_list)
                session.commit()
def load_searx_instances() -> dict:
    repo_directory = get_git_repository_path(SEARXINSTANCES_GIT_REPOSITORY)
    get_repository(repo_directory, SEARXINSTANCES_GIT_REPOSITORY)
    model_module = import_module('searxinstances.model', repo_directory)
    return model_module.load()