Example #1
0
def test_fast_import(repo_version, data_archive, tmp_path, cli_runner, chdir):
    table = H.POINTS.LAYER
    with data_archive("gpkg-points") as data:
        # list tables
        repo_path = tmp_path / "data.sno"
        repo_path.mkdir()

        with chdir(repo_path):
            r = cli_runner.invoke(["init", "--repo-version", repo_version])
            assert r.exit_code == 0, r

            repo = pygit2.Repository(str(repo_path))

            source = OgrImportSource.open(data / "nz-pa-points-topo-150k.gpkg",
                                          table=table)

            fast_import.fast_import_tables(repo, [source])

            assert not repo.is_empty
            assert repo.head.name == "refs/heads/master"
            assert repo.head.shorthand == "master"

            dataset = structure.RepositoryStructure(repo)[table]

            # has a single commit
            assert len([c for c in repo.walk(repo.head.target)]) == 1
            assert dataset.version == int(repo_version)
            assert list(dataset.meta_items())

            # has the right number of features
            feature_count = sum(1 for f in dataset.features())
            assert feature_count == source.feature_count
Example #2
0
def _upgrade_commit(
    i,
    source_repo,
    source_commit,
    source_version,
    source_dataset_class,
    dest_parents,
    dest_repo,
    commit_map,
):

    sources = [
        ds
        for ds in RepositoryStructure(
            source_repo,
            commit=source_commit,
            version=source_version,
            dataset_class=source_dataset_class,
        )
    ]
    dataset_count = len(sources)
    feature_count = sum(s.feature_count for s in sources)

    s = source_commit
    author_time = f"{s.author.time} {minutes_to_tz_offset(s.author.offset)}"
    commit_time = f"{s.commit_time} {minutes_to_tz_offset(s.commit_time_offset)}"
    header = (
        # We import every commit onto refs/heads/master and fix the branch heads later.
        "commit refs/heads/master\n"
        f"author {s.author.name} <{s.author.email}> {author_time}\n"
        f"committer {s.committer.name} <{s.committer.email}> {commit_time}\n"
        f"data {len(s.message.encode('utf8'))}\n{s.message}\n"
    )
    header += "".join(f"merge {p}\n" for p in dest_parents)

    fast_import_tables(
        dest_repo,
        sources,
        replace_existing=ReplaceExisting.ALL,
        quiet=True,
        header=header,
        # We import every commit onto refs/heads/master, even though not all commits are related - this means
        # the master branch head will jump all over the place. git-fast-import only allows this with --force.
        extra_cmd_args=["--force"],
    )

    dest_commit = dest_repo.head.peel(pygit2.Commit)
    commit_map[source_commit.hex] = dest_commit.hex

    commit_time = datetime.fromtimestamp(source_commit.commit_time)
    click.echo(
        f"  {i}: {source_commit.hex[:8]} → {dest_commit.hex[:8]}"
        f" ({commit_time}; {source_commit.committer.name}; {dataset_count} datasets; {feature_count} rows)"
    )
Example #3
0
def _upgrade_commit(i, source_repo, source_commit, dest_parents, dest_repo,
                    commit_map):
    source_repo_structure = RepositoryStructure(source_repo,
                                                commit=source_commit)
    sources = {
        dataset.path: ImportV1Dataset(dataset)
        for dataset in source_repo_structure
    }
    dataset_count = len(sources)
    feature_count = sum(s.row_count for s in sources.values())

    s = source_commit
    commit_time = _raw_commit_time(s)
    header = (
        "commit refs/heads/master\n"
        f"author {s.author.name} <{s.author.email}> {commit_time}\n"
        f"committer {s.committer.name} <{s.committer.email}> {commit_time}\n"
        f"data {len(s.message.encode('utf8'))}\n{s.message}\n")
    header += "".join(f"merge {p}\n" for p in dest_parents)

    fast_import_tables(
        dest_repo,
        sources,
        incremental=False,
        quiet=True,
        header=header,
        structure_version=2,
    )

    dest_commit = dest_repo.head.peel(pygit2.Commit)
    commit_map[source_commit.hex] = dest_commit.hex

    commit_time = datetime.fromtimestamp(source_commit.commit_time)
    click.echo(
        f"  {i}: {source_commit.hex[:8]} → {dest_commit.hex[:8]} ({commit_time}; {source_commit.committer.name}; {dataset_count} datasets; {feature_count} rows)"
    )