Ejemplo n.º 1
0
 def test_attach_none(self, db_session):
     """ Test expected failure when we try to attach to a dataset that
     does not exist.
     """
     with pytest.raises(DatasetNotFound):
         Dataset.attach(controller="frodo",
                        name="venus",
                        state=States.UPLOADING)
Ejemplo n.º 2
0
    def process(self, link: str, state: States) -> int:
        """
        process Create Dataset records for pre-existing server tarballs that
        are in a specified filesystem "state" (the link directory in the
        archive tree), in a specified Dataset state.

        Each tarball for which a Dataset record already exists is IGNORED,
        and we don't attempt to advance the state.

        Args:
            :link (str):        Filesystem "state" link directory
                                (e.g., TO-INDEX)
            :state (States):    A state enum value representing desired Dataset
                                state.

        Returns:
            int: Status (0 success, 1 failure)
        """
        logger = self.logger
        done = 0
        fail = 0
        ignore = 0
        args = {}
        owner = User.validate_user(self.options.user)

        for tarball in self._collect_tb(link):
            if self.options.verify:
                print(f"Processing {tarball} from {link} -> state {state}")
            try:
                args["path"] = tarball
                try:
                    dataset = Dataset.attach(**args)
                    if self.options.verify:
                        print(f"Found existing {dataset}: {dataset.state}")
                    ignore = ignore + 1
                except DatasetNotFound:
                    a = args.copy()
                    a["md5"] = open(f"{tarball}.md5").read().split()[0]

                    # NOTE: including "state" on attach above would attempt to
                    # advance the dataset's state, which we don't want for
                    # import, so we add it only here. "owner" would be ignored
                    # by attach, but we add it here anyway for clarity.
                    a["state"] = state
                    a["owner"] = owner
                    dataset = Dataset.create(**a)
                    print(f"Imported {dataset}: {state}")
                    done = done + 1
            except Exception as e:
                # Stringify any exception and report it; then fail
                logger.exception("Import of dataset {} failed", tarball)
                print(f"{_NAME_}: dataset {tarball} failed with {e}",
                      file=sys.stderr)
                fail = fail + 1
        print(
            f"Imported {done} datasets from {link} with {fail} errors and {ignore} ignored"
        )
        return 1 if fail > 0 else 0
Ejemplo n.º 3
0
 def test_dataset_survives_user(self, db_session, create_user):
     """The Dataset isn't automatically removed when the referenced
     user is removed.
     """
     user = create_user
     ds = Dataset(owner=user.username, controller="frodo", name="fio")
     ds.add()
     User.delete(username=user.username)
     ds1 = Dataset.attach(controller="frodo", name="fio")
     assert ds1 == ds
Ejemplo n.º 4
0
 def test_construct(self, db_session, create_user):
     """ Test dataset contructor
     """
     user = create_user
     ds = Dataset(owner=user.username, controller="frodo", name="fio")
     ds.add()
     assert ds.owner == user
     assert ds.controller == "frodo"
     assert ds.name == "fio"
     assert ds.state == States.UPLOADING
     assert ds.md5 is None
     assert ds.created <= ds.transition
     assert ds.id is not None
     assert "test(1)|frodo|fio" == str(ds)
Ejemplo n.º 5
0
    def test_attach_filename(self, db_session, create_user):
        """ Test that we can create a dataset using the full tarball
        file path.
        """
        ds1 = Dataset(owner="test",
                      path="/foo/bilbo/rover.tar.xz",
                      state=States.QUARANTINED)
        ds1.add()

        ds2 = Dataset.attach(controller="bilbo", name="rover")
        assert ds2.owner == ds1.owner
        assert ds2.controller == ds1.controller
        assert ds2.name == ds1.name
        assert ds2.state == States.QUARANTINED
        assert ds2.md5 is ds1.md5
        assert ds2.id is ds1.id
Ejemplo n.º 6
0
    def test_attach_controller_path(self, db_session, create_user):
        """ Test that we can attach using controller and name to a
        dataset created by file path.
        """
        ds1 = Dataset(
            owner=create_user.username,
            path="/foo/frodo/fio.tar.xz",
            state=States.INDEXING,
        )
        ds1.add()

        ds2 = Dataset.attach(controller="frodo", name="fio")
        assert ds2.owner == ds1.owner
        assert ds2.controller == ds1.controller
        assert ds2.name == ds1.name
        assert ds2.state == States.INDEXING
        assert ds2.md5 is ds1.md5
        assert ds2.id is ds1.id
Ejemplo n.º 7
0
 def test_construct_bad_state(self, db_session, create_user):
     """Test with a non-States state value
     """
     with pytest.raises(DatasetBadParameterType):
         Dataset(
             owner=create_user.username,
             controller="frodo",
             name="fio",
             state="notStates",
         )
Ejemplo n.º 8
0
    def test_attach_exists(self, db_session, create_user):
        """ Test that we can attach to a dataset
        """
        ds1 = Dataset(
            owner=create_user.username,
            controller="frodo",
            name="fio",
            state=States.INDEXING,
        )
        ds1.add()

        ds2 = Dataset.attach(controller="frodo",
                             name="fio",
                             state=States.INDEXED)
        assert ds2.owner == ds1.owner
        assert ds2.controller == ds1.controller
        assert ds2.name == ds1.name
        assert ds2.state == States.INDEXED
        assert ds2.md5 is ds1.md5
        assert ds2.id is ds1.id
Ejemplo n.º 9
0
 def test_lifecycle(self, db_session, create_user):
     """ Advance a dataset through the entire lifecycle using the state
     transition dict.
     """
     ds = Dataset(owner=create_user.username,
                  controller="frodo",
                  name="fio")
     ds.add()
     assert ds.state == States.UPLOADING
     beenthere = [ds.state]
     while ds.state in Dataset.transitions:
         advances = Dataset.transitions[ds.state]
         for n in advances:
             if n not in beenthere:
                 next = n
                 break
         else:
             break  # avoid infinite reindex loop!
         beenthere.append(next)
         ds.advance(next)
         assert ds.state == next
     lifecycle = ",".join([s.name for s in beenthere])
     assert (
         lifecycle ==
         "UPLOADING,UPLOADED,UNPACKING,UNPACKED,INDEXING,INDEXED,EXPIRING,EXPIRED"
     )
Ejemplo n.º 10
0
def attach_dataset(monkeypatch, pbench_token, create_user):
    """
    Mock a Dataset attach call to return an object. We mock the Dataset.attach
    method to avoid DB access here, however the user authentication mechanism
    is not yet mocked so we have to look up User data.

    Args:
        monkeypatch: patching fixture
        pbench_token: create a "drb" user for testing
        create_user: create a "test" user
    """
    datasets = {}
    drb = User.query(username="******")  # Created by pbench_token fixture
    test = User.query(username="******")  # Created by create_user fixture
    datasets["drb"] = Dataset(
        owner=drb,
        owner_id=drb.id,
        controller="node",
        name="drb",
        access="private",
        id=1,
    )
    datasets["test"] = Dataset(
        owner=test,
        owner_id=test.id,
        controller="node",
        name="test",
        access="private",
        id=2,
    )

    def attach_dataset(controller: str, name: str) -> Dataset:
        return datasets[name]

    with monkeypatch.context() as m:
        m.setattr(Dataset, "attach", attach_dataset)
        yield
Ejemplo n.º 11
0
 def test_advanced_bad_state(self, db_session, create_user):
     """Test with a non-States state value
     """
     ds = Dataset(owner=create_user.username,
                  controller="frodo",
                  name="fio")
     ds.add()
     with pytest.raises(DatasetBadParameterType):
         ds.advance("notStates")
Ejemplo n.º 12
0
 def test_advanced_illegal(self, db_session, create_user):
     """ Test that we can't advance to a state that's not a
     successor to the initial state.
     """
     ds = Dataset(owner=create_user.username,
                  controller="frodo",
                  name="fio")
     ds.add()
     with pytest.raises(DatasetBadStateTransition):
         ds.advance(States.EXPIRED)
Ejemplo n.º 13
0
 def test_advanced_good(self, db_session, create_user):
     """ Test advancing the state of a dataset
     """
     ds = Dataset(owner=create_user.username,
                  controller="frodo",
                  name="fio")
     ds.add()
     ds.advance(States.UPLOADED)
     assert ds.state == States.UPLOADED
     assert ds.created <= ds.transition
Ejemplo n.º 14
0
 def test_advanced_terminal(self, db_session, create_user):
     """ Test that we can't advance from a terminal state
     """
     ds = Dataset(
         owner=create_user.username,
         controller="frodo",
         name="fio",
         state=States.EXPIRED,
     )
     ds.add()
     with pytest.raises(DatasetTerminalStateViolation):
         ds.advance(States.UPLOADING)
Ejemplo n.º 15
0
    def test_metadata_remove(self, db_session, create_user):
        """ Test that we can remove a Metadata key
        """
        ds = Dataset.create(owner=create_user.username,
                            controller="frodo",
                            name="fio")
        assert ds.metadatas == []
        m = Metadata(key=Metadata.ARCHIVED, value="TRUE")
        m.add(ds)
        assert ds.metadatas == [m]

        Metadata.remove(ds, Metadata.ARCHIVED)
        assert ds.metadatas == []
        with pytest.raises(MetadataNotFound) as exc:
            Metadata.get(ds, Metadata.ARCHIVED)
        assert exc.value.dataset == ds
        assert exc.value.key == Metadata.ARCHIVED

        Metadata.remove(ds, Metadata.REINDEX)
        assert ds.metadatas == []
Ejemplo n.º 16
0
    def preprocess(self, client_json: JSON) -> CONTEXT:
        """
        Query the Dataset associated with this name, and determine whether the
        authenticated user has UPDATE access to this dataset. (Currently, this
        means the authenticated user is the owner of the dataset, or has ADMIN
        role.)

        If the user has authorization to update the dataset, return the dataset
        object as CONTEXT so that the postprocess operation can mark it as
        published.

        Args:
            json_data: JSON dictionary of type-normalized key-value pairs
                controller: the controller that generated the dataset
                name: name of the dataset to publish
                access: The desired access level of the dataset (currently either
                    "private" or "public")

        Returns:
            CONTEXT referring to the dataset object if the operation should
            continue, or None
        """
        dataset = Dataset.attach(controller=client_json["controller"],
                                 name=client_json["name"])
        owner = User.query(id=dataset.owner_id)
        if not owner:
            self.logger.error("Dataset owner ID {} cannot be found in Users",
                              dataset.owner_id)
            abort(HTTPStatus.INTERNAL_SERVER_ERROR,
                  message="Dataset owner not found")

        # For publish, we check authorization against the ownership of the
        # dataset that was selected rather than having an explicit "user"
        # JSON parameter. This will raise UnauthorizedAccess on failure.
        self._check_authorization(owner.username, client_json["access"])

        # The dataset exists, so continue the operation with the appropriate
        # CONTEXT.
        return {"dataset": dataset}
Ejemplo n.º 17
0
    def test_upload(
        self, client, pytestconfig, caplog, server_config, setup_ctrl, pbench_token
    ):
        # This is a really weird and ugly file name that should be
        # maintained through all the marshalling and unmarshalling on the
        # wire until it lands on disk and in the Dataset.
        filename = "pbench-user-benchmark_some + config_2021.05.01T12.42.42.tar.xz"
        tmp_d = pytestconfig.cache.get("TMP", None)
        datafile = Path(tmp_d, filename)
        file_contents = b"something\n"
        md5 = hashlib.md5()
        md5.update(file_contents)
        datafile.write_bytes(file_contents)

        with datafile.open("rb") as data_fp:
            response = client.put(
                self.gen_uri(server_config, filename),
                data=data_fp,
                headers=self.gen_headers(pbench_token, md5.hexdigest()),
            )

        assert response.status_code == HTTPStatus.CREATED, repr(response)
        tmp_d = pytestconfig.cache.get("TMP", None)
        receive_dir = Path(
            tmp_d, "srv", "pbench", "pbench-move-results-receive", "fs-version-002"
        )
        assert (
            receive_dir.exists()
        ), f"receive_dir = '{receive_dir}', filename = '{filename}'"

        dataset = Dataset.attach(controller=self.controller, path=filename)
        assert dataset is not None
        assert dataset.md5 == md5.hexdigest()
        assert dataset.controller == self.controller
        assert dataset.name == filename[:-7]
        assert dataset.state == States.UPLOADED

        for record in caplog.records:
            assert record.levelname == "INFO"
Ejemplo n.º 18
0
def backup_data(lb_obj, s3_obj, config, logger):
    qdir = config.QDIR

    tarlist = glob.iglob(
        os.path.join(config.ARCHIVE, "*", _linksrc, "*.tar.xz"))
    ntotal = nbackup_success = nbackup_fail = ns3_success = ns3_fail = nquaran = 0

    for tb in sorted(tarlist):
        ntotal += 1
        # resolve the link
        try:
            tar = Path(tb).resolve(strict=True)
        except FileNotFoundError:
            logger.error(
                "Tarball link, '{}', does not resolve to a real location", tb)

        logger.debug("Start backup of {}.", tar)
        # check tarball exist and it is a regular file
        if tar.exists() and tar.is_file():
            pass
        else:
            # tarball does not exist or it is not a regular file
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, {} does not exist or it is not a regular file",
                tb,
                tar,
            )
            continue

        archive_md5 = Path(f"{tar}.md5")
        # check that the md5 file exists and it is a regular file
        if archive_md5.exists() and archive_md5.is_file():
            pass
        else:
            # md5 file does not exist or it is not a regular file
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, {} does not exist or it is not a regular file",
                tb,
                archive_md5,
            )
            continue

        # read the md5sum from md5 file
        try:
            with archive_md5.open() as f:
                archive_md5_hex_value = f.readline().split(" ")[0]
        except Exception:
            # Could not read file.
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.exception("Quarantine: {}, Could not read {}", tb,
                             archive_md5)
            continue

        # match md5sum of the tarball to its md5 file
        try:
            (_, archive_tar_hex_value) = md5sum(tar)
        except Exception:
            # Could not read file.
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.exception("Quarantine: {}, Could not read {}", tb, tar)
            continue

        if archive_tar_hex_value != archive_md5_hex_value:
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, md5sum of {} does not match with its md5 file {}",
                tb,
                tar,
                archive_md5,
            )
            continue

        resultname = tar.name
        controller_path = tar.parent
        controller = controller_path.name
        try:
            # This tool can't see a dataset until it's been prepared either
            # by server PUT or by pbench-server-prep-shim-002.py; in either
            # case, the Dataset object must already exist.
            dataset = Dataset.attach(controller=controller, path=resultname)
        except DatasetError as e:
            logger.warning("Trouble tracking {}:{}: {}", controller,
                           resultname, str(e))
            dataset = None

        # This will handle all the local backup related
        # operations and count the number of successes and failures.
        local_backup_result = backup_to_local(
            lb_obj,
            logger,
            controller_path,
            controller,
            tb,
            tar,
            resultname,
            archive_md5,
            archive_md5_hex_value,
        )

        if local_backup_result == Status.SUCCESS:
            nbackup_success += 1
        elif local_backup_result == Status.FAIL:
            nbackup_fail += 1
        else:
            assert (
                False
            ), f"Impossible situation, local_backup_result = {local_backup_result!r}"

        # This will handle all the S3 bucket related operations
        # and count the number of successes and failures.
        s3_backup_result = backup_to_s3(
            s3_obj,
            logger,
            controller_path,
            controller,
            tb,
            tar,
            resultname,
            archive_md5_hex_value,
        )

        if s3_backup_result == Status.SUCCESS:
            ns3_success += 1
        elif s3_backup_result == Status.FAIL:
            ns3_fail += 1
        else:
            assert (
                False
            ), f"Impossible situation, s3_backup_result = {s3_backup_result!r}"

        if local_backup_result == Status.SUCCESS and (
                s3_obj is None or s3_backup_result == Status.SUCCESS):
            # Move tar ball symlink to its final resting place
            rename_tb_link(tb, Path(controller_path, _linkdest), logger)
        else:
            # Do nothing when the backup fails, allowing us to retry on a
            # future pass.
            pass

        if dataset:
            Metadata.create(dataset=dataset,
                            key=Metadata.ARCHIVED,
                            value="True")
        logger.debug("End backup of {}.", tar)

    return Results(
        ntotal=ntotal,
        nbackup_success=nbackup_success,
        nbackup_fail=nbackup_fail,
        ns3_success=ns3_success,
        ns3_fail=ns3_fail,
        nquaran=nquaran,
    )
Ejemplo n.º 19
0
def reindex(controller_name, tb_name, archive_p, incoming_p, dry_run=False):
    """reindex - re-index the given tar ball name.

    This method is responsible for finding the current symlink to the tar ball
    and moving it to the TO-RE-INDEX directory, creating that directory if
    it does not exist.
    """
    assert tb_name.endswith(".tar.xz"), f"invalid tar ball name, '{tb_name}'"

    if not (incoming_p / controller_name / tb_name[:-7]).exists():
        # Can't re-index tar balls that are not unpacked
        return (controller_name, tb_name, "not-unpacked", "")

    # Construct the controller path object used throughout the rest of this
    # method.
    controller_p = archive_p / controller_name
    # Construct the target path to which all tar ball symlinks will be moved.
    newpath = controller_p.joinpath("TO-RE-INDEX", tb_name)

    paths = []
    _linkdirs = ("TO-INDEX-TOOL", "INDEXED")
    for linkname_p in controller_p.glob(f"*/{tb_name}"):
        # Consider all existing tar ball symlinks
        if linkname_p.parent.name in ("TO-INDEX", "TO-RE-INDEX"):
            msg = (f"WARNING: {linkname_p.parent.name} link already exists for"
                   f" {controller_p / tb_name}")
            # Indicate no action taken, and exit early.
            return (controller_name, tb_name, "exists", msg)
        elif linkname_p.parent.name in _linkdirs:
            # One of the expected symlinks, add it for consideration below.
            paths.append(linkname_p)
        elif linkname_p.parent.name.startswith("WONT-INDEX"):
            # One of the expected WONT-INDEX* symlinks, also added for
            # consideration below.
            paths.append(linkname_p)
        # else:
        #   All other symlinks are not considered.

    if not paths:
        # No existing TO-INDEX or TO-RE-INDEX symlink, and no previous
        # indexing symlinks, exit early.
        return (controller_name, tb_name, "noop", "")

    if len(paths) > 1:
        # If we have more than one path then just flag this as a bad state
        # and exit early.
        return (controller_name, tb_name, "badstate", "")

    # At this point we are guaranteed to have only one path.
    assert len(paths) == 1, f"Logic bomb!  len(paths) ({len(paths)}) != 1"

    try:
        if not dry_run:
            paths[0].rename(newpath)
            ds = Dataset.attach(controller=controller_name, path=tb_name)
            Metadata.create(dataset=ds, key=Metadata.REINDEX, value="True")
    except DatasetError as exc:
        msg = f"WARNING: unable to set REINDEX metadata for {controller_name}:{tb_name}: {str(exc)}"
        res = "error"
    except Exception as exc:
        msg = (f"WARNING: failed to rename symlink '{paths[0]}' to"
               f" '{newpath}', '{exc}'")
        res = "error"
    else:
        msg = ""
        res = "succ"
    return (controller_name, tb_name, res, msg)
Ejemplo n.º 20
0
    def test_metadata(self, db_session, create_user):
        """ Various tests on Metadata keys
        """
        # See if we can create a metadata row
        ds = Dataset.create(owner=create_user.username,
                            controller="frodo",
                            name="fio")
        assert ds.metadatas == []
        m = Metadata.create(key=Metadata.REINDEX, value="TRUE", dataset=ds)
        assert m is not None
        assert ds.metadatas == [m]

        # Try to get it back
        m1 = Metadata.get(ds, Metadata.REINDEX)
        assert m1.key == m.key
        assert m1.value == m.value
        assert m.id == m1.id
        assert m.dataset_ref == m1.dataset_ref

        # Check the str()
        assert "test(1)|frodo|fio>>REINDEX" == str(m)

        # Try to get a metadata key that doesn't exist
        with pytest.raises(MetadataNotFound) as exc:
            Metadata.get(ds, Metadata.TARBALL_PATH)
        assert exc.value.dataset == ds
        assert exc.value.key == Metadata.TARBALL_PATH

        # Try to remove a metadata key that doesn't exist (No-op)
        Metadata.remove(ds, Metadata.TARBALL_PATH)

        # Try to create a metadata with a bad key
        badkey = "THISISNOTTHEKEYYOURELOOKINGFOR"
        with pytest.raises(MetadataBadKey) as exc:
            Metadata(key=badkey, value=None)
        assert exc.value.key == badkey

        # Try to create a key without a value
        with pytest.raises(MetadataMissingKeyValue):
            Metadata(key=Metadata.REINDEX)

        # Try to add a duplicate metadata key
        with pytest.raises(MetadataDuplicateKey) as exc:
            m1 = Metadata(key=Metadata.REINDEX, value="IRRELEVANT")
            m1.add(ds)
        assert exc.value.key == Metadata.REINDEX
        assert exc.value.dataset == ds
        assert ds.metadatas == [m]

        # Try to add a Metadata key to something that's not a dataset
        with pytest.raises(DatasetBadParameterType) as exc:
            m1 = Metadata(key=Metadata.TARBALL_PATH, value="DONTCARE")
            m1.add("foobar")
        assert exc.value.bad_value == "foobar"
        assert exc.value.expected_type == Dataset.__name__

        # Try to create a Metadata with a bad value for the dataset
        with pytest.raises(DatasetBadParameterType) as exc:
            m1 = Metadata.create(key=Metadata.REINDEX,
                                 value="TRUE",
                                 dataset=[ds])
        assert exc.value.bad_value == [ds]
        assert exc.value.expected_type == Dataset.__name__

        # Try to update the metadata key
        m.value = "False"
        m.update()
        m1 = Metadata.get(ds, Metadata.REINDEX)
        assert m.id == m1.id
        assert m.dataset_ref == m1.dataset_ref
        assert m.key == m1.key
        assert m.value == "False"

        # Delete the key and make sure its gone
        m.delete()
        with pytest.raises(MetadataNotFound) as exc:
            Metadata.get(ds, Metadata.REINDEX)
        assert exc.value.dataset == ds
        assert exc.value.key == Metadata.REINDEX
        assert ds.metadatas == []
Ejemplo n.º 21
0
    def process_tb(self, tarballs):
        """Process Tarballs For Indexing and creates report

            "tarballs" - List of tarball, it is the second value of
                the tuple returned by collect_tb() """
        res = 0
        idxctx = self.idxctx
        error_code = self.error_code

        tb_deque = deque(sorted(tarballs))

        # At this point, tarballs contains a list of tar balls sorted by size
        # that were available as symlinks in the various 'linksrc' directories.
        idxctx.logger.debug("Preparing to index {:d} tar balls", len(tb_deque))

        try:
            # Now that we are ready to begin the actual indexing step, ensure we
            # have the proper index templates in place.
            idxctx.logger.debug("update_templates [start]")
            idxctx.templates.update_templates(idxctx.es)
        except TemplateError as e:
            res = self.emit_error(idxctx.logger.error,
                                  "TEMPLATE_CREATION_ERROR", e)
        except SigTermException:
            # Re-raise a SIGTERM to avoid it being lumped in with general
            # exception handling below.
            raise
        except Exception:
            idxctx.logger.exception(
                "update_templates [end]: Unexpected template"
                " processing error")
            res = error_code["GENERIC_ERROR"]
        else:
            idxctx.logger.debug("update_templates [end]")
            res = error_code["OK"]

        if not res.success:
            # Exit early if we encounter any errors.
            return res.value

        report = Report(
            idxctx.config,
            self.name,
            es=idxctx.es,
            pid=idxctx.getpid(),
            group_id=idxctx.getgid(),
            user_id=idxctx.getuid(),
            hostname=idxctx.gethostname(),
            version=VERSION,
            templates=idxctx.templates,
        )
        # We use the "start" report ID as the tracking ID for all indexed
        # documents.
        try:
            tracking_id = report.post_status(tstos(idxctx.time()), "start")
        except SigTermException:
            # Re-raise a SIGTERM to avoid it being lumped in with general
            # exception handling below.
            raise
        except Exception:
            idxctx.logger.error("Failed to post initial report status")
            return error_code["GENERIC_ERROR"].value
        else:
            idxctx.set_tracking_id(tracking_id)

        with tempfile.TemporaryDirectory(prefix=f"{self.name}.",
                                         dir=idxctx.config.TMP) as tmpdir:
            idxctx.logger.debug("start processing list of tar balls")
            tb_list = Path(tmpdir, f"{self.name}.{idxctx.TS}.list")
            try:
                with tb_list.open(mode="w") as lfp:
                    # Write out all the tar balls we are processing so external
                    # viewers can follow along from home.
                    for size, controller, tb in tarballs:
                        print(f"{size:20d} {controller} {tb}", file=lfp)

                indexed = Path(tmpdir, f"{self.name}.{idxctx.TS}.indexed")
                erred = Path(tmpdir, f"{self.name}.{idxctx.TS}.erred")
                skipped = Path(tmpdir, f"{self.name}.{idxctx.TS}.skipped")
                ie_filepath = Path(
                    tmpdir, f"{self.name}.{idxctx.TS}.indexing-errors.json")

                # We use a list object here so that when we close over this
                # variable in the handler, the list object will be closed over,
                # but not its contents.
                sigquit_interrupt = [False]

                def sigquit_handler(*args):
                    sigquit_interrupt[0] = True

                sighup_interrupt = [False]

                def sighup_handler(*args):
                    sighup_interrupt[0] = True

                signal.signal(signal.SIGQUIT, sigquit_handler)
                signal.signal(signal.SIGHUP, sighup_handler)
                count_processed_tb = 0

                try:
                    while len(tb_deque) > 0:
                        size, controller, tb = tb_deque.popleft()
                        # Sanity check source tar ball path
                        linksrc_dir = Path(tb).parent
                        linksrc_dirname = linksrc_dir.name
                        count_processed_tb += 1
                        assert linksrc_dirname == self.linksrc, (
                            f"Logic bomb!  tar ball "
                            f"path {tb} does not contain {self.linksrc}")

                        idxctx.logger.info("Starting {} (size {:d})", tb, size)
                        dataset = None
                        ptb = None
                        userid = None
                        try:
                            path = os.path.realpath(tb)

                            try:
                                dataset = Dataset.attach(
                                    path=path,
                                    state=States.INDEXING,
                                )
                            except DatasetNotFound:
                                idxctx.logger.warn(
                                    "Unable to locate Dataset {}",
                                    path,
                                )
                            except DatasetTransitionError as e:
                                # TODO: This means the Dataset is known, but not in a
                                # state where we'd expect to be indexing it. So what do
                                # we do with it? (Note: this is where an audit log will
                                # be handy; i.e., how did we get here?) For now, just
                                # let it go.
                                idxctx.logger.warn(
                                    "Unable to advance dataset state: {}",
                                    str(e))
                            else:
                                # NOTE: we index the owner_id foreign key not the username.
                                # Although this is technically an integer, I'm clinging to
                                # the notion that we want to keep this as a "keyword" (string)
                                # field.
                                userid = str(dataset.owner_id)

                            # "Open" the tar ball represented by the tar ball object
                            idxctx.logger.debug("open tar ball")
                            ptb = PbenchTarBall(
                                idxctx,
                                userid,
                                path,
                                tmpdir,
                                Path(self.incoming, controller),
                            )

                            # Construct the generator for emitting all actions.  The
                            # `idxctx` dictionary is passed along to each generator so
                            # that it can add its context for error handling to the
                            # list.
                            idxctx.logger.debug("generator setup")
                            if self.options.index_tool_data:
                                actions = ptb.mk_tool_data_actions()
                            else:
                                actions = ptb.make_all_actions()

                            # File name for containing all indexing errors that
                            # can't/won't be retried.
                            with ie_filepath.open(mode="w") as fp:
                                idxctx.logger.debug("begin indexing")
                                try:
                                    signal.signal(signal.SIGINT,
                                                  sigint_handler)
                                    es_res = es_index(
                                        idxctx.es,
                                        actions,
                                        fp,
                                        idxctx.logger,
                                        idxctx._dbg,
                                    )
                                except SigIntException:
                                    idxctx.logger.exception(
                                        "Indexing interrupted by SIGINT, continuing to next tarball"
                                    )
                                    continue
                                finally:
                                    # Turn off the SIGINT handler when not indexing.
                                    signal.signal(signal.SIGINT,
                                                  signal.SIG_IGN)
                        except UnsupportedTarballFormat as e:
                            tb_res = self.emit_error(idxctx.logger.warning,
                                                     "TB_META_ABSENT", e)
                        except BadDate as e:
                            tb_res = self.emit_error(idxctx.logger.warning,
                                                     "BAD_DATE", e)
                        except FileNotFoundError as e:
                            tb_res = self.emit_error(idxctx.logger.warning,
                                                     "FILE_NOT_FOUND_ERROR", e)
                        except BadMDLogFormat as e:
                            tb_res = self.emit_error(idxctx.logger.warning,
                                                     "BAD_METADATA", e)
                        except SigTermException:
                            idxctx.logger.exception(
                                "Indexing interrupted by SIGTERM, terminating")
                            break
                        except Exception as e:
                            tb_res = self.emit_error(idxctx.logger.exception,
                                                     "GENERIC_ERROR", e)
                        else:
                            beg, end, successes, duplicates, failures, retries = es_res
                            idxctx.logger.info(
                                "done indexing (start ts: {}, end ts: {}, duration:"
                                " {:.2f}s, successes: {:d}, duplicates: {:d},"
                                " failures: {:d}, retries: {:d})",
                                tstos(beg),
                                tstos(end),
                                end - beg,
                                successes,
                                duplicates,
                                failures,
                                retries,
                            )
                            tb_res = error_code[
                                "OP_ERROR" if failures > 0 else "OK"]
                        finally:
                            if dataset:
                                try:
                                    dataset.advance(
                                        States.INDEXED if tb_res.
                                        success else States.QUARANTINED)

                                    # In case this was a re-index, remove the
                                    # REINDEX tag.
                                    Metadata.remove(dataset, Metadata.REINDEX)

                                    # Because we're on the `finally` path, we
                                    # can get here without a PbenchTarBall
                                    # object, so don't try to write an index
                                    # map if there is none.
                                    if ptb:
                                        # A pbench-index --tool-data follows a
                                        # pbench-index and generates only the
                                        # tool-specific documents: we want to
                                        # merge that with the existing document
                                        # map. On the other hand, a re-index
                                        # should replace the entire index. We
                                        # accomplish this by overwriting each
                                        # duplicate index key separately.
                                        try:
                                            meta = Metadata.get(
                                                dataset, Metadata.INDEX_MAP)
                                            map = json.loads(meta.value)
                                            map.update(ptb.index_map)
                                            meta.value = json.dumps(map)
                                            meta.update()
                                        except MetadataNotFound:
                                            Metadata.create(
                                                dataset=dataset,
                                                key=Metadata.INDEX_MAP,
                                                value=json.dumps(
                                                    ptb.index_map),
                                            )
                                        except Exception as e:
                                            idxctx.logger.exception(
                                                "Unexpected Metadata error on {}: {}",
                                                ptb.tbname,
                                                e,
                                            )
                                except DatasetTransitionError:
                                    idxctx.logger.exception(
                                        "Dataset state error: {}", ptb.tbname)
                                except DatasetError as e:
                                    idxctx.logger.exception(
                                        "Dataset error on {}: {}", ptb.tbname,
                                        e)
                                except Exception as e:
                                    idxctx.logger.exception(
                                        "Unexpected error on {}: {}",
                                        ptb.tbname, e)

                        try:
                            ie_len = ie_filepath.stat().st_size
                        except FileNotFoundError:
                            # Above operation never made it to actual indexing, ignore.
                            pass
                        except SigTermException:
                            # Re-raise a SIGTERM to avoid it being lumped in with
                            # general exception handling below.
                            raise
                        except Exception:
                            idxctx.logger.exception(
                                "Unexpected error handling"
                                " indexing errors file: {}",
                                ie_filepath,
                            )
                        else:
                            # Success fetching indexing error file size.
                            if ie_len > len(tb) + 1:
                                try:
                                    report.post_status(tstos(end), "errors",
                                                       ie_filepath)
                                except Exception:
                                    idxctx.logger.exception(
                                        "Unexpected error issuing"
                                        " report status with errors: {}",
                                        ie_filepath,
                                    )
                        finally:
                            # Unconditionally remove the indexing errors file.
                            try:
                                os.remove(ie_filepath)
                            except SigTermException:
                                # Re-raise a SIGTERM to avoid it being lumped in with
                                # general exception handling below.
                                raise
                            except Exception:
                                pass
                        # Distinguish failure cases, so we can retry the indexing
                        # easily if possible.  Different `linkerrdest` directories for
                        # different failures; the rest are going to end up in
                        # `linkerrdest` for later retry.
                        controller_path = linksrc_dir.parent

                        if tb_res is error_code["OK"]:
                            idxctx.logger.info(
                                "{}: {}/{}: success",
                                idxctx.TS,
                                controller_path.name,
                                os.path.basename(tb),
                            )
                            # Success
                            with indexed.open(mode="a") as fp:
                                print(tb, file=fp)
                            rename_tb_link(
                                tb, Path(controller_path, self.linkdest),
                                idxctx.logger)
                        elif tb_res is error_code["OP_ERROR"]:
                            idxctx.logger.warning(
                                "{}: index failures encountered on {}",
                                idxctx.TS, tb)
                            with erred.open(mode="a") as fp:
                                print(tb, file=fp)
                            rename_tb_link(
                                tb,
                                Path(controller_path, f"{self.linkerrdest}.1"),
                                idxctx.logger,
                            )
                        elif tb_res in (error_code["CFG_ERROR"],
                                        error_code["BAD_CFG"]):
                            assert False, (
                                f"Logic Bomb!  Unexpected tar ball handling "
                                f"result status {tb_res.value:d} for tar ball {tb}"
                            )
                        elif tb_res.tarball_error:
                            # # Quietly skip these errors
                            with skipped.open(mode="a") as fp:
                                print(tb, file=fp)
                            rename_tb_link(
                                tb,
                                Path(
                                    controller_path,
                                    f"{self.linkerrdest}.{tb_res.value:d}",
                                ),
                                idxctx.logger,
                            )
                        else:
                            idxctx.logger.error(
                                "{}: index error {:d} encountered on {}",
                                idxctx.TS,
                                tb_res.value,
                                tb,
                            )
                            with erred.open(mode="a") as fp:
                                print(tb, file=fp)
                            rename_tb_link(
                                tb,
                                Path(controller_path, self.linkerrdest),
                                idxctx.logger,
                            )
                        idxctx.logger.info(
                            "Finished{} {} (size {:d})",
                            "[SIGQUIT]" if sigquit_interrupt[0] else "",
                            tb,
                            size,
                        )

                        if sigquit_interrupt[0]:
                            break
                        if sighup_interrupt[0]:
                            status, new_tb = self.collect_tb()
                            if status == 0:
                                if not set(new_tb).issuperset(tb_deque):
                                    idxctx.logger.info(
                                        "Tarballs supposed to be in 'TO-INDEX' are no longer present",
                                        set(tb_deque).difference(new_tb),
                                    )
                                tb_deque = deque(sorted(new_tb))
                            idxctx.logger.info(
                                "SIGHUP status (Current tar ball indexed: ({}), Remaining: {}, Completed: {}, Errors_encountered: {}, Status: {})",
                                Path(tb).name,
                                len(tb_deque),
                                count_processed_tb,
                                _count_lines(erred),
                                tb_res,
                            )
                            sighup_interrupt[0] = False
                            continue
                except SigTermException:
                    idxctx.logger.exception(
                        "Indexing interrupted by SIGQUIT, stop processing tarballs"
                    )
                finally:
                    # Turn off the SIGQUIT and SIGHUP handler when not indexing.
                    signal.signal(signal.SIGQUIT, signal.SIG_IGN)
                    signal.signal(signal.SIGHUP, signal.SIG_IGN)
            except SigTermException:
                # Re-raise a SIGTERM to avoid it being lumped in with general
                # exception handling below.
                raise
            except Exception:
                idxctx.logger.exception(error_code["GENERIC_ERROR"].message)
                res = error_code["GENERIC_ERROR"]
            else:
                # No exceptions while processing tar ball, success.
                res = error_code["OK"]
            finally:
                if idxctx:
                    idxctx.dump_opctx()
                idxctx.logger.debug("stopped processing list of tar balls")

                idx = _count_lines(indexed)
                skp = _count_lines(skipped)
                err = _count_lines(erred)

                idxctx.logger.info(
                    "{}.{}: indexed {:d} (skipped {:d}) results,"
                    " {:d} errors",
                    self.name,
                    idxctx.TS,
                    idx,
                    skp,
                    err,
                )

                if err > 0:
                    if skp > 0:
                        subj = (
                            f"{self.name}.{idxctx.TS} - Indexed {idx:d} results, skipped {skp:d}"
                            f" results, w/ {err:d} errors")
                    else:
                        subj = (
                            f"{self.name}.{idxctx.TS} - Indexed {idx:d} results, w/ {err:d}"
                            " errors")
                else:
                    if skp > 0:
                        subj = f"{self.name}.{idxctx.TS} - Indexed {idx:d} results, skipped {skp:d} results"
                    else:
                        subj = f"{self.name}.{idxctx.TS} - Indexed {idx:d} results"

                report_fname = Path(tmpdir, f"{self.name}.{idxctx.TS}.report")
                with report_fname.open(mode="w") as fp:
                    print(subj, file=fp)
                    if idx > 0:
                        print("\nIndexed Results\n===============", file=fp)
                        with indexed.open() as ifp:
                            for line in sorted(ifp):
                                print(line.strip(), file=fp)
                    if err > 0:
                        print(
                            "\nResults producing errors"
                            "\n========================",
                            file=fp,
                        )
                        with erred.open() as efp:
                            for line in sorted(efp):
                                print(line.strip(), file=fp)
                    if skp > 0:
                        print("\nSkipped Results\n===============", file=fp)
                        with skipped.open() as sfp:
                            for line in sorted(sfp):
                                print(line.strip(), file=fp)
                try:
                    report.post_status(tstos(idxctx.time()), "status",
                                       report_fname)
                except SigTermException:
                    # Re-raise a SIGTERM to avoid it being lumped in with general
                    # exception handling below.
                    raise
                except Exception:
                    pass

        return res.value
Ejemplo n.º 22
0
 def test_construct_bad_owner(self):
     """Test with a non-existent username
     """
     with pytest.raises(DatasetBadParameterType):
         Dataset(owner="notme", controller="frodo", name="fio")
Ejemplo n.º 23
0
    def put(self, filename: str):
        try:
            username = Auth.token_auth.current_user().username
        except Exception as exc:
            self.logger.error("Error verifying the username: '******'", exc)
            abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR")

        if os.path.basename(filename) != filename:
            msg = "File must not contain a path"
            self.logger.warning(
                "{} for user = {}, file = {!a}",
                msg,
                username,
                filename,
            )
            abort(HTTPStatus.BAD_REQUEST, message=msg)

        if not self.supported_file_extension(filename):
            msg = f"File extension not supported, must be {self.ALLOWED_EXTENSION}"
            self.logger.warning(
                "{} for user = {}, file = {!a}",
                msg,
                username,
                filename,
            )
            abort(HTTPStatus.BAD_REQUEST, message=msg)

        controller = request.headers.get("controller")
        if not controller:
            msg = "Missing required controller header"
            self.logger.warning("{} for user = {}, file = {!a}", msg, username,
                                filename)
            abort(HTTPStatus.BAD_REQUEST, message=msg)
        if validate_hostname(controller) != 0:
            msg = "Invalid controller header"
            self.logger.warning(
                "{} for user = {}, ctrl = {!a}, file = {!a}",
                msg,
                username,
                controller,
                filename,
            )
            abort(HTTPStatus.BAD_REQUEST, message=msg)

        md5sum = request.headers.get("Content-MD5")
        if not md5sum:
            msg = "Missing required Content-MD5 header"
            self.logger.warning(
                "{} for user = {}, ctrl = {!a}, file = {!a}",
                msg,
                username,
                controller,
                filename,
            )
            abort(HTTPStatus.BAD_REQUEST, message=msg)

        status = HTTPStatus.OK
        try:
            content_length = int(request.headers["Content-Length"])
        except KeyError:
            msg = "Missing required Content-Length header"
            status = HTTPStatus.LENGTH_REQUIRED
        except ValueError:
            msg = f"Invalid Content-Length header, not an integer ({content_length})"
            status = HTTPStatus.BAD_REQUEST
        else:
            if not (0 < content_length <= self.max_content_length):
                msg = "Content-Length ({}) must be greater than 0 and no greater than {}".format(
                    content_length,
                    humanize.naturalsize(self.max_content_length))
                status = (HTTPStatus.REQUEST_ENTITY_TOO_LARGE
                          if 0 < content_length else HTTPStatus.BAD_REQUEST)
        if status != HTTPStatus.OK:
            self.logger.warning(
                "{} for user = {}, ctrl = {!a}, file = {!a}",
                msg,
                username,
                controller,
                filename,
            )
            abort(status, message=msg)

        path = self.upload_directory / controller
        path.mkdir(exist_ok=True)
        tar_full_path = Path(path, filename)
        md5_full_path = Path(path, f"{filename}.md5")
        bytes_received = 0

        # Create a tracking dataset object; it'll begin in UPLOADING state
        try:
            dataset = Dataset(owner=username,
                              controller=controller,
                              path=tar_full_path,
                              md5=md5sum)
            dataset.add()
        except DatasetDuplicate:
            self.logger.info(
                "Dataset already exists, user = {}, ctrl = {!a}, file = {!a}",
                username,
                controller,
                filename,
            )
            response = jsonify(dict(message="Dataset already exists"))
            response.status_code = HTTPStatus.OK
            return response
        except Exception as exc:
            self.logger.error(
                "unable to create dataset, '{}', for user = {}, ctrl = {!a}, file = {!a}",
                exc,
                username,
                controller,
                filename,
            )
            abort(
                HTTPStatus.INTERNAL_SERVER_ERROR,
                message="INTERNAL ERROR",
            )

        if tar_full_path.is_file() or md5_full_path.is_file():
            self.logger.error(
                "Dataset, or corresponding md5 file, already present; tar {} ({}), md5 {} ({})",
                tar_full_path,
                "present" if tar_full_path.is_file() else "missing",
                md5_full_path,
                "present" if md5_full_path.is_file() else "missing",
            )
            abort(
                HTTPStatus.INTERNAL_SERVER_ERROR,
                message="INTERNAL ERROR",
            )

        self.logger.info(
            "Uploading file {!a} (user = {}, ctrl = {!a}) to {}",
            filename,
            username,
            controller,
            dataset,
        )

        with tempfile.NamedTemporaryFile(mode="wb", dir=path) as ofp:
            hash_md5 = hashlib.md5()

            try:
                while True:
                    chunk = request.stream.read(self.CHUNK_SIZE)
                    bytes_received += len(chunk)
                    if len(chunk) == 0 or bytes_received > content_length:
                        break

                    ofp.write(chunk)
                    hash_md5.update(chunk)
            except OSError as exc:
                if exc.errno == errno.ENOSPC:
                    self.logger.error(
                        "Not enough space on volume, {}, for upload:"
                        " user = {}, ctrl = {!a}, file = {!a}",
                        path,
                        username,
                        controller,
                        filename,
                    )
                    abort(HTTPStatus.INSUFFICIENT_STORAGE)
                else:
                    msg = "Unexpected error encountered during file upload"
                    self.logger.error(
                        "{}, {}, for user = {}, ctrl = {!a}, file = {!a}",
                        msg,
                        exc,
                        username,
                        controller,
                        filename,
                    )
                    abort(HTTPStatus.INTERNAL_SERVER_ERROR,
                          message="INTERNAL ERROR")
            except Exception as exc:
                msg = "Unexpected error encountered during file upload"
                self.logger.error(
                    "{}, {}, for user = {}, ctrl = {!a}, file = {!a}",
                    msg,
                    exc,
                    username,
                    controller,
                    filename,
                )
                abort(HTTPStatus.INTERNAL_SERVER_ERROR,
                      message="INTERNAL ERROR")

            if bytes_received != content_length:
                msg = (
                    "Bytes received do not match Content-Length header"
                    f" (expected {content_length}; received {bytes_received})")
                self.logger.warning(
                    "{} for user = {}, ctrl = {!a}, file = {!a}",
                    msg,
                    username,
                    controller,
                    filename,
                )
                abort(HTTPStatus.BAD_REQUEST, message=msg)
            elif hash_md5.hexdigest() != md5sum:
                msg = ("MD5 checksum does not match Content-MD5 header"
                       f" ({hash_md5.hexdigest()} != {md5sum})")
                self.logger.warning(
                    "{} for user = {}, ctrl = {!a}, file = {!a}",
                    msg,
                    username,
                    controller,
                    filename,
                )
                abort(HTTPStatus.BAD_REQUEST, message=msg)

            # First write the .md5
            try:
                md5_full_path.write_text(f"{md5sum} {filename}\n")
            except Exception as exc:
                try:
                    md5_full_path.unlink(missing_ok=True)
                except Exception as md5_exc:
                    self.logger.error(
                        "Failed to remove .md5 {} when trying to clean up: '{}'",
                        md5_full_path,
                        md5_exc,
                    )
                self.logger.error("Failed to write .md5 file, '{}': '{}'",
                                  md5_full_path, exc)
                abort(HTTPStatus.INTERNAL_SERVER_ERROR,
                      message="INTERNAL ERROR")

            # Then create the final filename link to the temporary file.
            try:
                os.link(ofp.name, tar_full_path)
            except Exception as exc:
                try:
                    md5_full_path.unlink()
                except Exception as md5_exc:
                    self.logger.error(
                        "Failed to remove .md5 {} when trying to clean up: {}",
                        md5_full_path,
                        md5_exc,
                    )
                self.logger.error(
                    "Failed to rename tar ball '{}' to '{}': '{}'",
                    ofp.name,
                    md5_full_path,
                    exc,
                )
                abort(HTTPStatus.INTERNAL_SERVER_ERROR,
                      message="INTERNAL ERROR")

        try:
            dataset.advance(States.UPLOADED)
        except Exception as exc:
            self.logger.error("Unable to finalize {}, '{}'", dataset, exc)
            abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR")
        response = jsonify(dict(message="File successfully uploaded"))
        response.status_code = HTTPStatus.CREATED
        return response
Ejemplo n.º 24
0
def process_tb(config, logger, receive_dir, qdir_md5, duplicates, errors):

    # Check for results that are ready for processing: version 002 agents
    # upload the MD5 file as xxx.md5.check and they rename it to xxx.md5
    # after they are done with MD5 checking so that's what we look for.
    list_check = glob.glob(
        os.path.join(receive_dir, "**", "*.tar.xz.md5"), recursive=True
    )

    archive = config.ARCHIVE
    logger.info("{}", config.TS)
    list_check.sort()
    nstatus = ""

    ntotal = ntbs = nerrs = nquarantined = ndups = 0

    for tbmd5 in list_check:
        ntotal += 1

        # full pathname of tarball
        tb = Path(tbmd5[0:-4])
        tbmd5 = Path(tbmd5)

        # directory
        tbdir = tb.parent

        # resultname: get the basename foo.tar.xz and then strip the .tar.xz
        resultname = tb.name

        controller = tbdir.name
        dest = archive / controller

        # Create a new dataset tracker in UPLOADING state, and add it to the
        # database.
        #
        # NOTE: Technically, this particular workflow has no "UPLOADING" as
        # the `pbench-server-prep-shim-002` command isn't invoked until the
        # tarball and MD5 has been entirely uploaded by the agent via `ssh`;
        # this method however can't be supported once we have authorized user
        # ownership, and the model fits the server `PUT` method where an
        # unexpected termination could leave a tarball in "Uploading" state.
        #
        # TODO: We have no way to identify an owner here, so assign it to
        # the arbitrary "pbench" user. This will go away when we drop this
        # component entirely in favor of PUT.
        try:
            dataset = Dataset.create(
                controller=controller, path=resultname, owner="pbench"
            )
        except DatasetError as e:
            logger.error(
                "Unable to create dataset {}>{}: {}", controller, resultname, str(e)
            )
            # TODO: Should we quarantine over this? Note it's not quite
            # straightforward, as quarantine() expects that the Dataset has
            # been created, so we'll get a cascade failure. Since prep-shim's
            # days are numbered, I'm inclined not to worry about it here.
            dataset = None

        if all([(dest / resultname).is_file(), (dest / tbmd5.name).is_file()]):
            logger.error("{}: Duplicate: {} duplicate name", config.TS, tb)
            quarantine((duplicates / controller), logger, tb, tbmd5)
            ndups += 1
            continue

        archive_tar_hex_value, archive_md5_hex_value = md5_check(tb, tbmd5, logger)
        if any(
            [
                archive_tar_hex_value != archive_md5_hex_value,
                archive_tar_hex_value is None,
                archive_md5_hex_value is None,
            ]
        ):
            logger.error("{}: Quarantined: {} failed MD5 check", config.TS, tb)
            logger.info("{}: FAILED", tb.name)
            logger.info("md5sum: WARNING: 1 computed checksum did NOT match")
            quarantine((qdir_md5 / controller), logger, tb, tbmd5)
            nquarantined += 1
            continue

        if dataset:
            try:
                dataset.md5 = archive_md5_hex_value
                dataset.update()
            except DatasetError as e:
                logger.warn(
                    "Unable to update dataset {} with md5: {}", str(dataset), str(e)
                )

        # make the destination directory and its TODO subdir if necessary.
        try:
            os.makedirs(dest / "TODO")
        except FileExistsError:
            # directory already exists, ignore
            pass
        except Exception:
            logger.error("{}: Error in creating TODO directory.", config.TS)
            quarantine(os.path.join(errors, controller), logger, tb, tbmd5)
            nerrs += 1
            continue

        # First, copy the small .md5 file to the destination. That way, if
        # that operation fails it will fail quickly since the file is small.
        try:
            shutil.copy2(tbmd5, dest)
        except Exception:
            logger.error(
                "{}: Error in copying .md5 file to Destination path.", config.TS
            )
            try:
                os.remove(dest / tbmd5.name)
            except FileNotFoundError:
                logger.error(
                    "{}: Warning: cleanup of copy failure failed itself.", config.TS
                )
            quarantine((errors / controller), logger, tb, tbmd5)
            nerrs += 1
            continue

        # Next, mv the "large" tar ball to the destination. If the destination
        # is on the same device, the move should be quick. If the destination is
        # on a different device, the move will be a copy and delete, and will
        # take a bit longer.  If it fails, the file will NOT be at the
        # destination.
        try:
            shutil.move(str(tb), str(dest))
        except Exception:
            logger.error(
                "{}: Error in moving tarball file to Destination path.", config.TS
            )
            try:
                os.remove(dest / resultname)
            except FileNotFoundError:
                logger.error(
                    "{}: Warning: cleanup of copy failure failed itself.", config.TS
                )
            quarantine((errors / controller), logger, tb, tbmd5)
            nerrs += 1
            continue

        # Restore the SELinux context properly
        try:
            selinux.restorecon(dest / tb.name)
            selinux.restorecon(dest / tbmd5.name)
        except Exception as e:
            # log it but do not abort
            logger.error("{}: Error: 'restorecon {}', {}", config.TS, dest / tb.name, e)

        # Now that we have successfully moved the tar ball and its .md5 to the
        # destination, we can remove the original .md5 file.
        try:
            os.remove(tbmd5)
        except Exception as exc:
            logger.error(
                "{}: Warning: cleanup of successful copy operation failed: '{}'",
                config.TS,
                exc,
            )

        try:
            os.symlink((dest / resultname), (dest / "TODO" / resultname))
        except Exception as exc:
            logger.error("{}: Error in creation of symlink. '{}'", config.TS, exc)
            # if we fail to make the link, we quarantine the (already moved)
            # tarball and .md5.
            quarantine(
                (errors / controller), logger, (dest / tb), (dest / tbmd5),
            )
            nerrs += 1
            continue

        ntbs += 1

        try:
            if dataset:
                dataset.advance(States.UPLOADED)
        except Exception:
            logger.exception("Unable to finalize {}", dataset)

        nstatus = f"{nstatus}{config.TS}: processed {tb}\n"
        logger.info(f"{tb.name}: OK")

    return Results(
        nstatus=nstatus,
        ntotal=ntotal,
        ntbs=ntbs,
        nquarantined=nquarantined,
        ndups=ndups,
        nerrs=nerrs,
    )