def process(self, link: str, state: States) -> int: """ process Create Dataset records for pre-existing server tarballs that are in a specified filesystem "state" (the link directory in the archive tree), in a specified Dataset state. Each tarball for which a Dataset record already exists is IGNORED, and we don't attempt to advance the state. Args: :link (str): Filesystem "state" link directory (e.g., TO-INDEX) :state (States): A state enum value representing desired Dataset state. Returns: int: Status (0 success, 1 failure) """ logger = self.logger done = 0 fail = 0 ignore = 0 args = {} owner = User.validate_user(self.options.user) for tarball in self._collect_tb(link): if self.options.verify: print(f"Processing {tarball} from {link} -> state {state}") try: args["path"] = tarball try: dataset = Dataset.attach(**args) if self.options.verify: print(f"Found existing {dataset}: {dataset.state}") ignore = ignore + 1 except DatasetNotFound: a = args.copy() a["md5"] = open(f"{tarball}.md5").read().split()[0] # NOTE: including "state" on attach above would attempt to # advance the dataset's state, which we don't want for # import, so we add it only here. "owner" would be ignored # by attach, but we add it here anyway for clarity. a["state"] = state a["owner"] = owner dataset = Dataset.create(**a) print(f"Imported {dataset}: {state}") done = done + 1 except Exception as e: # Stringify any exception and report it; then fail logger.exception("Import of dataset {} failed", tarball) print(f"{_NAME_}: dataset {tarball} failed with {e}", file=sys.stderr) fail = fail + 1 print( f"Imported {done} datasets from {link} with {fail} errors and {ignore} ignored" ) return 1 if fail > 0 else 0
def test_metadata_remove(self, db_session, create_user): """ Test that we can remove a Metadata key """ ds = Dataset.create(owner=create_user.username, controller="frodo", name="fio") assert ds.metadatas == [] m = Metadata(key=Metadata.ARCHIVED, value="TRUE") m.add(ds) assert ds.metadatas == [m] Metadata.remove(ds, Metadata.ARCHIVED) assert ds.metadatas == [] with pytest.raises(MetadataNotFound) as exc: Metadata.get(ds, Metadata.ARCHIVED) assert exc.value.dataset == ds assert exc.value.key == Metadata.ARCHIVED Metadata.remove(ds, Metadata.REINDEX) assert ds.metadatas == []
def test_metadata(self, db_session, create_user): """ Various tests on Metadata keys """ # See if we can create a metadata row ds = Dataset.create(owner=create_user.username, controller="frodo", name="fio") assert ds.metadatas == [] m = Metadata.create(key=Metadata.REINDEX, value="TRUE", dataset=ds) assert m is not None assert ds.metadatas == [m] # Try to get it back m1 = Metadata.get(ds, Metadata.REINDEX) assert m1.key == m.key assert m1.value == m.value assert m.id == m1.id assert m.dataset_ref == m1.dataset_ref # Check the str() assert "test(1)|frodo|fio>>REINDEX" == str(m) # Try to get a metadata key that doesn't exist with pytest.raises(MetadataNotFound) as exc: Metadata.get(ds, Metadata.TARBALL_PATH) assert exc.value.dataset == ds assert exc.value.key == Metadata.TARBALL_PATH # Try to remove a metadata key that doesn't exist (No-op) Metadata.remove(ds, Metadata.TARBALL_PATH) # Try to create a metadata with a bad key badkey = "THISISNOTTHEKEYYOURELOOKINGFOR" with pytest.raises(MetadataBadKey) as exc: Metadata(key=badkey, value=None) assert exc.value.key == badkey # Try to create a key without a value with pytest.raises(MetadataMissingKeyValue): Metadata(key=Metadata.REINDEX) # Try to add a duplicate metadata key with pytest.raises(MetadataDuplicateKey) as exc: m1 = Metadata(key=Metadata.REINDEX, value="IRRELEVANT") m1.add(ds) assert exc.value.key == Metadata.REINDEX assert exc.value.dataset == ds assert ds.metadatas == [m] # Try to add a Metadata key to something that's not a dataset with pytest.raises(DatasetBadParameterType) as exc: m1 = Metadata(key=Metadata.TARBALL_PATH, value="DONTCARE") m1.add("foobar") assert exc.value.bad_value == "foobar" assert exc.value.expected_type == Dataset.__name__ # Try to create a Metadata with a bad value for the dataset with pytest.raises(DatasetBadParameterType) as exc: m1 = Metadata.create(key=Metadata.REINDEX, value="TRUE", dataset=[ds]) assert exc.value.bad_value == [ds] assert exc.value.expected_type == Dataset.__name__ # Try to update the metadata key m.value = "False" m.update() m1 = Metadata.get(ds, Metadata.REINDEX) assert m.id == m1.id assert m.dataset_ref == m1.dataset_ref assert m.key == m1.key assert m.value == "False" # Delete the key and make sure its gone m.delete() with pytest.raises(MetadataNotFound) as exc: Metadata.get(ds, Metadata.REINDEX) assert exc.value.dataset == ds assert exc.value.key == Metadata.REINDEX assert ds.metadatas == []
def process_tb(config, logger, receive_dir, qdir_md5, duplicates, errors): # Check for results that are ready for processing: version 002 agents # upload the MD5 file as xxx.md5.check and they rename it to xxx.md5 # after they are done with MD5 checking so that's what we look for. list_check = glob.glob( os.path.join(receive_dir, "**", "*.tar.xz.md5"), recursive=True ) archive = config.ARCHIVE logger.info("{}", config.TS) list_check.sort() nstatus = "" ntotal = ntbs = nerrs = nquarantined = ndups = 0 for tbmd5 in list_check: ntotal += 1 # full pathname of tarball tb = Path(tbmd5[0:-4]) tbmd5 = Path(tbmd5) # directory tbdir = tb.parent # resultname: get the basename foo.tar.xz and then strip the .tar.xz resultname = tb.name controller = tbdir.name dest = archive / controller # Create a new dataset tracker in UPLOADING state, and add it to the # database. # # NOTE: Technically, this particular workflow has no "UPLOADING" as # the `pbench-server-prep-shim-002` command isn't invoked until the # tarball and MD5 has been entirely uploaded by the agent via `ssh`; # this method however can't be supported once we have authorized user # ownership, and the model fits the server `PUT` method where an # unexpected termination could leave a tarball in "Uploading" state. # # TODO: We have no way to identify an owner here, so assign it to # the arbitrary "pbench" user. This will go away when we drop this # component entirely in favor of PUT. try: dataset = Dataset.create( controller=controller, path=resultname, owner="pbench" ) except DatasetError as e: logger.error( "Unable to create dataset {}>{}: {}", controller, resultname, str(e) ) # TODO: Should we quarantine over this? Note it's not quite # straightforward, as quarantine() expects that the Dataset has # been created, so we'll get a cascade failure. Since prep-shim's # days are numbered, I'm inclined not to worry about it here. dataset = None if all([(dest / resultname).is_file(), (dest / tbmd5.name).is_file()]): logger.error("{}: Duplicate: {} duplicate name", config.TS, tb) quarantine((duplicates / controller), logger, tb, tbmd5) ndups += 1 continue archive_tar_hex_value, archive_md5_hex_value = md5_check(tb, tbmd5, logger) if any( [ archive_tar_hex_value != archive_md5_hex_value, archive_tar_hex_value is None, archive_md5_hex_value is None, ] ): logger.error("{}: Quarantined: {} failed MD5 check", config.TS, tb) logger.info("{}: FAILED", tb.name) logger.info("md5sum: WARNING: 1 computed checksum did NOT match") quarantine((qdir_md5 / controller), logger, tb, tbmd5) nquarantined += 1 continue if dataset: try: dataset.md5 = archive_md5_hex_value dataset.update() except DatasetError as e: logger.warn( "Unable to update dataset {} with md5: {}", str(dataset), str(e) ) # make the destination directory and its TODO subdir if necessary. try: os.makedirs(dest / "TODO") except FileExistsError: # directory already exists, ignore pass except Exception: logger.error("{}: Error in creating TODO directory.", config.TS) quarantine(os.path.join(errors, controller), logger, tb, tbmd5) nerrs += 1 continue # First, copy the small .md5 file to the destination. That way, if # that operation fails it will fail quickly since the file is small. try: shutil.copy2(tbmd5, dest) except Exception: logger.error( "{}: Error in copying .md5 file to Destination path.", config.TS ) try: os.remove(dest / tbmd5.name) except FileNotFoundError: logger.error( "{}: Warning: cleanup of copy failure failed itself.", config.TS ) quarantine((errors / controller), logger, tb, tbmd5) nerrs += 1 continue # Next, mv the "large" tar ball to the destination. If the destination # is on the same device, the move should be quick. If the destination is # on a different device, the move will be a copy and delete, and will # take a bit longer. If it fails, the file will NOT be at the # destination. try: shutil.move(str(tb), str(dest)) except Exception: logger.error( "{}: Error in moving tarball file to Destination path.", config.TS ) try: os.remove(dest / resultname) except FileNotFoundError: logger.error( "{}: Warning: cleanup of copy failure failed itself.", config.TS ) quarantine((errors / controller), logger, tb, tbmd5) nerrs += 1 continue # Restore the SELinux context properly try: selinux.restorecon(dest / tb.name) selinux.restorecon(dest / tbmd5.name) except Exception as e: # log it but do not abort logger.error("{}: Error: 'restorecon {}', {}", config.TS, dest / tb.name, e) # Now that we have successfully moved the tar ball and its .md5 to the # destination, we can remove the original .md5 file. try: os.remove(tbmd5) except Exception as exc: logger.error( "{}: Warning: cleanup of successful copy operation failed: '{}'", config.TS, exc, ) try: os.symlink((dest / resultname), (dest / "TODO" / resultname)) except Exception as exc: logger.error("{}: Error in creation of symlink. '{}'", config.TS, exc) # if we fail to make the link, we quarantine the (already moved) # tarball and .md5. quarantine( (errors / controller), logger, (dest / tb), (dest / tbmd5), ) nerrs += 1 continue ntbs += 1 try: if dataset: dataset.advance(States.UPLOADED) except Exception: logger.exception("Unable to finalize {}", dataset) nstatus = f"{nstatus}{config.TS}: processed {tb}\n" logger.info(f"{tb.name}: OK") return Results( nstatus=nstatus, ntotal=ntotal, ntbs=ntbs, nquarantined=nquarantined, ndups=ndups, nerrs=nerrs, )