def test_orc_datalad_pair_new_submodule(job_spec, dataset, shell):
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(shell,
                                           submission_type="local",
                                           job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        orc.fetch()

        # prepare_remote() doesn't fail when a new subdataset is added after
        # the first run.
        sub = dataset.create("sub")
        dataset.save()

        job_spec["_resolved_command_str"] = "sh -c 'echo a >sub/a'"
        job_spec["inputs"] = []
        job_spec["outputs"] = []

        orc = orcs.DataladPairOrchestrator(shell,
                                           submission_type="local",
                                           job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        orc.fetch()
        assert sub.repo.is_under_annex("a")
def test_orc_datalad_pair_need_follow_parent(job_spec, dataset, shell):
    # An example of a scenario that fails without DataLad's --follow=parentds
    with chpwd(dataset.path):
        dataset.create("sub")
        dataset.save()

        job_spec["_resolved_command_str"] = "sh -c 'echo baz >baz'"
        job_spec["inputs"] = []
        job_spec["outputs"] = []

        orc0 = orcs.DataladPairOrchestrator(shell,
                                            submission_type="local",
                                            job_spec=job_spec)
        orc0.prepare_remote()
        orc0.submit()
        orc0.follow()

        job_spec["_resolved_command_str"] = "sh -c 'echo bar >sub/bar'"
        output = op.join("sub", "bar")
        job_spec["outputs"] = [output]
        orc1 = orcs.DataladPairOrchestrator(shell,
                                            submission_type="local",
                                            job_spec=job_spec)
        orc1.prepare_remote()
        orc1.submit()
        orc1.follow()
        orc1.fetch()
        assert op.exists(output)
def test_orc_datalad_abort_if_dirty(job_spec, dataset, shell):
    with chpwd(dataset.path):
        orc0 = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        # Run one job so that we create the remote repository.
        orc0.prepare_remote()
        orc0.submit()
        orc0.follow()

    with chpwd(dataset.path):
        orc1 = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        create_tree(orc1.working_directory, {"dirty": ""})
        with pytest.raises(OrchestratorError) as exc:
            orc1.prepare_remote()
        assert "dirty" in str(exc)
def test_orc_datalad_pair_submodule_conflict(caplog, job_spec, dataset, shell):
    # In this scenario, one job modifies a submodule, and before that change,
    # another job is launched that modifies the same submodule. This creates a
    # change that can't be brought in with `datalad update` because, even with
    # --follow=parentds, the top-level repo still brings in changes from the
    # remote, whose branch points to the first job. In a diagram, the remote
    # state is:
    #
    #         ---- job 1 (branch)
    #  base --|
    #         ---- job 2 (detached)
    #
    # On fetch of job 2, we merge the job 2 ref. The `datalad update` call
    # fails trying to merge in branch.
    #
    # If this scenario ends up being common enough, we could consider modifying
    # `datalad update` to optionally not try to merge the remote state of the
    # top-level repo.
    with chpwd(dataset.path):
        dataset.create("sub")
        dataset.save()

        job_spec["_resolved_command_str"] = "sh -c 'echo baz >sub/baz'"
        job_spec["inputs"] = []
        job_spec["outputs"] = []

        orc0 = orcs.DataladPairOrchestrator(shell,
                                            submission_type="local",
                                            job_spec=job_spec)
        orc0.prepare_remote()
        orc0.submit()
        orc0.follow()

        job_spec["_resolved_command_str"] = "sh -c 'echo bar >sub/bar'"
        orc1 = orcs.DataladPairOrchestrator(shell,
                                            submission_type="local",
                                            job_spec=job_spec)
        orc1.prepare_remote()
        orc1.submit()
        orc1.follow()
        # swallow_logs() won't work here because it hard codes the logger and
        # the log message being checked is bubbled up by DataLad.
        caplog.clear()
        with caplog.at_level(logging.ERROR):
            orc1.fetch()
        assert "CONFLICT" in caplog.text
        assert dataset.repo.call_git(["ls-files", "--unmerged"]).strip()
def test_orc_datalad_abort_if_detached(job_spec, dataset, shell):
    dataset.repo.checkout("HEAD^{}")

    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        with pytest.raises(OrchestratorError):
            orc.prepare_remote()
def test_orc_datalad_resurrect(job_spec, dataset, shell):
    for k in ["jobid",
              "working_directory", "root_directory", "local_directory"]:
        job_spec[k] = "doesn't matter"
    job_spec["head"] = "deadbee"
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec,
            resurrection=True)
    assert orc.head == "deadbee"
def test_orc_datalad_pair(job_spec, dataset, shell):
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()

        orc.fetch()
        # The local fetch variant doesn't currently get the content, so just
        # check that the file is under annex.
        assert dataset.repo.is_under_annex("out")
def test_orc_datalad_pair_existing_remote(job_spec, dataset, shell):
    root_directory = job_spec["root_directory"]
    dataset.repo.add_remote("localshell", "i-dont-match")
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(shell,
                                           submission_type="local",
                                           job_spec=job_spec)
        # If a remote with the resource name exists, we abort if the
        # URL doesn't match the expected target...
        with pytest.raises(OrchestratorError):
            orc.prepare_remote()
        # ... and continue if it does.
        dataset.repo.set_remote_url("localshell", orc.working_directory)
        orc.prepare_remote()
def test_orc_datalad_pair_submodule(job_spec, dataset, shell):
    # Smoke test that triggers the failure from gh-499
    with chpwd(dataset.path):
        dataset.create("sub")
        dataset.save()

        job_spec["_resolved_command_str"] = "sh -c 'echo foo >sub/foo'"
        job_spec["inputs"] = []
        job_spec["outputs"] = []

        orc = orcs.DataladPairOrchestrator(shell,
                                           submission_type="local",
                                           job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        orc.fetch()
def test_orc_datalad_pair_merge_conflict(job_spec, dataset, shell):
    with chpwd(dataset.path):
        job_spec["_resolved_command_str"] = "sh -c 'echo baz >baz'"
        job_spec["inputs"] = []
        job_spec["outputs"] = []

        orc0 = orcs.DataladPairOrchestrator(shell,
                                            submission_type="local",
                                            job_spec=job_spec)
        orc0.prepare_remote()
        orc0.submit()
        orc0.follow()
        # Introduce a conflict.
        (dataset.pathobj / "baz").write_text("different")
        dataset.save()
        with swallow_logs(new_level=logging.WARNING) as logs:
            orc0.fetch()
            assert "Failed to merge in changes" in logs.out
        assert dataset.repo.call_git(["ls-files", "--unmerged"]).strip()