Example #1
0
    def process(self, link: str, state: States) -> int:
        """
        process Create Dataset records for pre-existing server tarballs that
        are in a specified filesystem "state" (the link directory in the
        archive tree), in a specified Dataset state.

        Each tarball for which a Dataset record already exists is IGNORED,
        and we don't attempt to advance the state.

        Args:
            :link (str):        Filesystem "state" link directory
                                (e.g., TO-INDEX)
            :state (States):    A state enum value representing desired Dataset
                                state.

        Returns:
            int: Status (0 success, 1 failure)
        """
        logger = self.logger
        done = 0
        fail = 0
        ignore = 0
        args = {}
        owner = User.validate_user(self.options.user)

        for tarball in self._collect_tb(link):
            if self.options.verify:
                print(f"Processing {tarball} from {link} -> state {state}")
            try:
                args["path"] = tarball
                try:
                    dataset = Dataset.attach(**args)
                    if self.options.verify:
                        print(f"Found existing {dataset}: {dataset.state}")
                    ignore = ignore + 1
                except DatasetNotFound:
                    a = args.copy()
                    a["md5"] = open(f"{tarball}.md5").read().split()[0]

                    # NOTE: including "state" on attach above would attempt to
                    # advance the dataset's state, which we don't want for
                    # import, so we add it only here. "owner" would be ignored
                    # by attach, but we add it here anyway for clarity.
                    a["state"] = state
                    a["owner"] = owner
                    dataset = Dataset.create(**a)
                    print(f"Imported {dataset}: {state}")
                    done = done + 1
            except Exception as e:
                # Stringify any exception and report it; then fail
                logger.exception("Import of dataset {} failed", tarball)
                print(f"{_NAME_}: dataset {tarball} failed with {e}", file=sys.stderr)
                fail = fail + 1
        print(
            f"Imported {done} datasets from {link} with {fail} errors and {ignore} ignored"
        )
        return 1 if fail > 0 else 0
Example #2
0
    def test_metadata_remove(self):
        """ Test that we can remove a Metadata key
        """
        ds = Dataset.create(owner="user1",
                            controller="controller",
                            name="other")
        assert ds.metadatas == []
        m = Metadata(key=Metadata.ARCHIVED, value="TRUE")
        m.add(ds)
        assert ds.metadatas == [m]

        Metadata.remove(ds, Metadata.ARCHIVED)
        assert ds.metadatas == []
        with pytest.raises(MetadataNotFound) as exc:
            Metadata.get(ds, Metadata.ARCHIVED)
        assert exc.value.dataset == ds
        assert exc.value.key == Metadata.ARCHIVED

        Metadata.remove(ds, Metadata.REINDEX)
        assert ds.metadatas == []
Example #3
0
def process_tb(config, logger, receive_dir, qdir_md5, duplicates, errors):

    # Check for results that are ready for processing: version 002 agents
    # upload the MD5 file as xxx.md5.check and they rename it to xxx.md5
    # after they are done with MD5 checking so that's what we look for.
    list_check = glob.glob(os.path.join(receive_dir, "**", "*.tar.xz.md5"),
                           recursive=True)

    archive = config.ARCHIVE
    logger.info("{}", config.TS)
    list_check.sort()
    nstatus = ""

    ntotal = ntbs = nerrs = nquarantined = ndups = 0

    for tbmd5 in list_check:
        ntotal += 1

        # full pathname of tarball
        tb = Path(tbmd5[0:-4])
        tbmd5 = Path(tbmd5)

        # directory
        tbdir = tb.parent

        # resultname: get the basename foo.tar.xz and then strip the .tar.xz
        resultname = tb.name

        controller = tbdir.name
        dest = archive / controller

        # Create a new dataset tracker in UPLOADING state, and add it to the
        # database.
        #
        # NOTE: Technically, this particular workflow has no "UPLOADING" as
        # the `pbench-server-prep-shim-002` command isn't invoked until the
        # tarball and MD5 has been entirely uploaded by the agent via `ssh`;
        # this method however can't be supported once we have authorized user
        # ownership, and the model fits the server `PUT` method where an
        # unexpected termination could leave a tarball in "Uploading" state.
        #
        # TODO: We have no way to identify an owner here, so assign it to
        # the arbitrary "pbench" user. This will go away when we drop this
        # component entirely in favor of PUT.
        try:
            dataset = Dataset.create(controller=controller,
                                     path=resultname,
                                     owner="pbench")
        except DatasetError as e:
            logger.error("Unable to create dataset {}>{}: {}", controller,
                         resultname, str(e))
            # TODO: Should we quarantine over this? Note it's not quite
            # straightforward, as quarantine() expects that the Dataset has
            # been created, so we'll get a cascade failure. Since prep-shim's
            # days are numbered, I'm inclined not to worry about it here.
            dataset = None

        if all([(dest / resultname).is_file(), (dest / tbmd5.name).is_file()]):
            logger.error("{}: Duplicate: {} duplicate name", config.TS, tb)
            quarantine((duplicates / controller), logger, tb, tbmd5)
            ndups += 1
            continue

        archive_tar_hex_value, archive_md5_hex_value = md5_check(
            tb, tbmd5, logger)
        if any([
                archive_tar_hex_value != archive_md5_hex_value,
                archive_tar_hex_value is None,
                archive_md5_hex_value is None,
        ]):
            logger.error("{}: Quarantined: {} failed MD5 check", config.TS, tb)
            logger.info("{}: FAILED", tb.name)
            logger.info("md5sum: WARNING: 1 computed checksum did NOT match")
            quarantine((qdir_md5 / controller), logger, tb, tbmd5)
            nquarantined += 1
            continue

        if dataset:
            try:
                dataset.md5 = archive_md5_hex_value
                dataset.update()
            except DatasetError as e:
                logger.warn("Unable to update dataset {} with md5: {}",
                            str(dataset), str(e))

        # make the destination directory and its TODO subdir if necessary.
        try:
            os.makedirs(dest / "TODO")
        except FileExistsError:
            # directory already exists, ignore
            pass
        except Exception:
            logger.error("{}: Error in creating TODO directory.", config.TS)
            quarantine(os.path.join(errors, controller), logger, tb, tbmd5)
            nerrs += 1
            continue

        # First, copy the small .md5 file to the destination. That way, if
        # that operation fails it will fail quickly since the file is small.
        try:
            shutil.copy2(tbmd5, dest)
        except Exception:
            logger.error("{}: Error in copying .md5 file to Destination path.",
                         config.TS)
            try:
                os.remove(dest / tbmd5.name)
            except FileNotFoundError:
                logger.error(
                    "{}: Warning: cleanup of copy failure failed itself.",
                    config.TS)
            quarantine((errors / controller), logger, tb, tbmd5)
            nerrs += 1
            continue

        # Next, mv the "large" tar ball to the destination. If the destination
        # is on the same device, the move should be quick. If the destination is
        # on a different device, the move will be a copy and delete, and will
        # take a bit longer.  If it fails, the file will NOT be at the
        # destination.
        try:
            shutil.move(str(tb), str(dest))
        except Exception:
            logger.error(
                "{}: Error in moving tarball file to Destination path.",
                config.TS)
            try:
                os.remove(dest / resultname)
            except FileNotFoundError:
                logger.error(
                    "{}: Warning: cleanup of copy failure failed itself.",
                    config.TS)
            quarantine((errors / controller), logger, tb, tbmd5)
            nerrs += 1
            continue

        # Restore the SELinux context properly
        try:
            selinux.restorecon(dest / tb.name)
            selinux.restorecon(dest / tbmd5.name)
        except Exception as e:
            # log it but do not abort
            logger.error("{}: Error: 'restorecon {}', {}", config.TS,
                         dest / tb.name, e)

        # Now that we have successfully moved the tar ball and its .md5 to the
        # destination, we can remove the original .md5 file.
        try:
            os.remove(tbmd5)
        except Exception as exc:
            logger.error(
                "{}: Warning: cleanup of successful copy operation failed: '{}'",
                config.TS,
                exc,
            )

        try:
            os.symlink((dest / resultname), (dest / "TODO" / resultname))
        except Exception as exc:
            logger.error("{}: Error in creation of symlink. '{}'", config.TS,
                         exc)
            # if we fail to make the link, we quarantine the (already moved)
            # tarball and .md5.
            quarantine(
                (errors / controller),
                logger,
                (dest / tb),
                (dest / tbmd5),
            )
            nerrs += 1
            continue

        ntbs += 1

        try:
            if dataset:
                dataset.advance(States.UPLOADED)
        except Exception:
            logger.exception("Unable to finalize {}", dataset)

        nstatus = f"{nstatus}{config.TS}: processed {tb}\n"
        logger.info(f"{tb.name}: OK")

    return Results(
        nstatus=nstatus,
        ntotal=ntotal,
        ntbs=ntbs,
        nquarantined=nquarantined,
        ndups=ndups,
        nerrs=nerrs,
    )
Example #4
0
    def test_metadata(self):
        """ Various tests on Metadata keys
        """
        # See if we can create a metadata row
        ds = Dataset.create(owner="redhat",
                            controller="controller",
                            name="name")
        assert ds.metadatas == []
        m = Metadata.create(key=Metadata.REINDEX, value="TRUE", dataset=ds)
        assert m is not None
        assert ds.metadatas == [m]

        # Try to get it back
        m1 = Metadata.get(ds, Metadata.REINDEX)
        assert m1.key == m.key
        assert m1.value == m.value
        assert m.id == m1.id
        assert m.dataset_ref == m1.dataset_ref

        # Check the str()
        assert "redhat|controller|name>>REINDEX" == str(m)

        # Try to get a metadata key that doesn't exist
        with pytest.raises(MetadataNotFound) as exc:
            Metadata.get(ds, Metadata.TARBALL_PATH)
        assert exc.value.dataset == ds
        assert exc.value.key == Metadata.TARBALL_PATH

        # Try to remove a metadata key that doesn't exist (No-op)
        Metadata.remove(ds, Metadata.TARBALL_PATH)

        # Try to create a metadata with a bad key
        badkey = "THISISNOTTHEKEYYOURELOOKINGFOR"
        with pytest.raises(MetadataBadKey) as exc:
            Metadata(key=badkey, value=None)
        assert exc.value.key == badkey

        # Try to create a key without a value
        with pytest.raises(MetadataMissingKeyValue):
            Metadata(key=Metadata.REINDEX)

        # Try to add a duplicate metadata key
        with pytest.raises(MetadataDuplicateKey) as exc:
            m1 = Metadata(key=Metadata.REINDEX, value="IRRELEVANT")
            m1.add(ds)
        assert exc.value.key == Metadata.REINDEX
        assert exc.value.dataset == ds
        assert ds.metadatas == [m]

        # Try to add a Metadata key to something that's not a dataset
        with pytest.raises(DatasetBadParameterType) as exc:
            m1 = Metadata(key=Metadata.TARBALL_PATH, value="DONTCARE")
            m1.add("foobar")
        assert exc.value.bad_value == "foobar"
        assert exc.value.expected_type == Dataset

        # Try to create a Metadata with a bad value for the dataset
        with pytest.raises(DatasetBadParameterType) as exc:
            m1 = Metadata.create(key=Metadata.REINDEX,
                                 value="TRUE",
                                 dataset=[ds])
        assert exc.value.bad_value == [ds]
        assert exc.value.expected_type == Dataset

        # Try to update the metadata key
        m.value = "False"
        m.update()
        m1 = Metadata.get(ds, Metadata.REINDEX)
        assert m.id == m1.id
        assert m.dataset_ref == m1.dataset_ref
        assert m.key == m1.key
        assert m.value == "False"

        # Delete the key and make sure its gone
        m.delete()
        with pytest.raises(MetadataNotFound) as exc:
            Metadata.get(ds, Metadata.REINDEX)
        assert exc.value.dataset == ds
        assert exc.value.key == Metadata.REINDEX
        assert ds.metadatas == []