Esempio n. 1
0
 def test_md5sum():
     # Reuse the existing file from the server upload fixture.
     filename = "log.tar.xz"
     test_file = Path("./lib/pbench/test/unit/server/fixtures/upload/",
                      filename)
     expected_hash_md5 = open(f"{test_file}.md5", "r").read().split()[0]
     hash_md5 = md5sum(test_file)
     assert (hash_md5 == expected_hash_md5
             ), f"Expected MD5 '{expected_hash_md5}', got '{hash_md5}'"
Esempio n. 2
0
    def copy_result_tb(self, token: str) -> None:
        """copy_result_tb - copies tb from agent to configured server upload URL

            Args
                token -- a token which establishes that the caller is
                    authorized to make the PUT request on behalf of a
                    specific user.
        """
        content_length, content_md5 = md5sum(str(self.tarball))
        headers = {
            "Content-MD5": content_md5,
            "Authorization": f"Bearer {token}",
            "controller": self.controller,
        }
        with self.tarball.open("rb") as f:
            try:
                request = requests.Request("PUT",
                                           self.upload_url,
                                           data=f,
                                           headers=headers).prepare()

                # Per RFC 2616, a request must not contain both
                # Content-Length and Transfer-Encoding headers; however,
                # the server would like to receive the Content-Length
                # header, but the requests package may opt to generate
                # the Transfer-Encoding header instead...so, check that
                # we got what we want before we send the request.  Also,
                # confirm that the contents of the Content-Length header
                # is what we expect.
                assert (
                    "Transfer-Encoding" not in request.headers
                ), "Upload request unexpectedly contains a `Transfer-Encoding` header"
                assert (
                    "Content-Length" in request.headers
                ), "Upload request unexpectedly missing a `Content-Length` header"
                assert request.headers["Content-Length"] == str(
                    content_length
                ), ("Upload request `Content-Length` header contains {} -- "
                    "expected {}".format(request.headers["Content-Length"],
                                         content_length))

                response = requests.Session().send(request)
                response.raise_for_status()
                self.logger.info("File uploaded successfully")
            except requests.exceptions.ConnectionError:
                raise RuntimeError(f"Cannot connect to '{self.upload_url}'")
            except Exception as exc:
                raise FileUploadError(
                    "There was something wrong with file upload request:  "
                    f"file: '{self.tarball}', URL: '{self.upload_url}', ({exc})"
                )
        assert (
            response.ok
        ), f"Logic bomb!  Unexpected error response, '{response.reason}' ({response.status_code})"
Esempio n. 3
0
def md5_check(tb, tbmd5, logger):
    # read the md5sum from md5 file
    try:
        with tbmd5.open() as f:
            archive_md5_hex_value = f.readline().split(" ")[0]
    except Exception:
        archive_md5_hex_value = None
        logger.exception("Quarantine: Could not read {}", tbmd5)

    # get hex value of the tarball's md5sum
    try:
        archive_tar_hex_value = md5sum(tb)
    except Exception:
        archive_tar_hex_value = None
        logger.exception("Quarantine: Could not read {}", tb)

    return (archive_md5_hex_value, archive_tar_hex_value)
Esempio n. 4
0
 def make_md5sum(self, tarball):
     tarball_md5 = Path(f"{tarball}.md5")
     try:
         hash_md5 = md5sum(tarball)
         tarball_md5.write_text(f"{hash_md5} {tarball.name}\n")
     except Exception:
         self.logger.error("md5sum failed for {}, skipping", tarball)
         try:
             tarball.unlink()
         except OSError as exc:
             if exc.errno != errno.ENOENT:
                 self.logger.error("error removing failed tar ball, {}",
                                   tarball)
         try:
             tarball_md5.unlink()
         except OSError as exc:
             if exc.errno != errno.ENOENT:
                 self.logger.error("error removing failed tar ball MD5, {}",
                                   tarball_md5)
         sys.exit(1)
    def checkmd5(self):
        # Function to check integrity of results in a local (archive or local
        # backup) directory.
        #
        # This function returns the count of results that failed the MD5 sum
        # check, and raises exceptions on failure.

        self.indicator_file = Path(self.tmpdir, f"list.{self.name}")
        self.indicator_file_ok = f"{self.indicator_file}.ok"
        self.indicator_file_fail = f"{self.indicator_file}.fail"
        self.nfailed_md5 = 0
        with open(self.indicator_file_ok,
                  "w") as f_ok, open(self.indicator_file_fail, "w") as f_fail:
            for tar in self.content_list:
                (_, md5_returned) = md5sum(Path(self.dirname, tar.name))
                if tar.md5 == md5_returned:
                    f_ok.write(f"{tar.name}: {'OK'}\n")
                else:
                    self.nfailed_md5 += 1
                    f_fail.write(f"{tar.name}: {'FAILED'}\n")
        return self.nfailed_md5
Esempio n. 6
0
def backup_data(lb_obj, s3_obj, config, logger):
    qdir = config.QDIR

    tarlist = glob.iglob(
        os.path.join(config.ARCHIVE, "*", _linksrc, "*.tar.xz"))
    ntotal = nbackup_success = nbackup_fail = ns3_success = ns3_fail = nquaran = 0

    for tb in sorted(tarlist):
        ntotal += 1
        # resolve the link
        try:
            tar = Path(tb).resolve(strict=True)
        except FileNotFoundError:
            logger.error(
                "Tarball link, '{}', does not resolve to a real location", tb)

        logger.debug("Start backup of {}.", tar)
        # check tarball exist and it is a regular file
        if tar.exists() and tar.is_file():
            pass
        else:
            # tarball does not exist or it is not a regular file
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, {} does not exist or it is not a regular file",
                tb,
                tar,
            )
            continue

        archive_md5 = Path(f"{tar}.md5")
        # check that the md5 file exists and it is a regular file
        if archive_md5.exists() and archive_md5.is_file():
            pass
        else:
            # md5 file does not exist or it is not a regular file
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, {} does not exist or it is not a regular file",
                tb,
                archive_md5,
            )
            continue

        # read the md5sum from md5 file
        try:
            with archive_md5.open() as f:
                archive_md5_hex_value = f.readline().split(" ")[0]
        except Exception:
            # Could not read file.
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.exception("Quarantine: {}, Could not read {}", tb,
                             archive_md5)
            continue

        # match md5sum of the tarball to its md5 file
        try:
            (_, archive_tar_hex_value) = md5sum(tar)
        except Exception:
            # Could not read file.
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.exception("Quarantine: {}, Could not read {}", tb, tar)
            continue

        if archive_tar_hex_value != archive_md5_hex_value:
            quarantine(qdir, logger, tb)
            nquaran += 1
            logger.error(
                "Quarantine: {}, md5sum of {} does not match with its md5 file {}",
                tb,
                tar,
                archive_md5,
            )
            continue

        resultname = tar.name
        controller_path = tar.parent
        controller = controller_path.name
        try:
            # This tool can't see a dataset until it's been prepared either
            # by server PUT or by pbench-server-prep-shim-002.py; in either
            # case, the Dataset object must already exist.
            dataset = Dataset.attach(controller=controller, path=resultname)
        except DatasetError as e:
            logger.warning("Trouble tracking {}:{}: {}", controller,
                           resultname, str(e))
            dataset = None

        # This will handle all the local backup related
        # operations and count the number of successes and failures.
        local_backup_result = backup_to_local(
            lb_obj,
            logger,
            controller_path,
            controller,
            tb,
            tar,
            resultname,
            archive_md5,
            archive_md5_hex_value,
        )

        if local_backup_result == Status.SUCCESS:
            nbackup_success += 1
        elif local_backup_result == Status.FAIL:
            nbackup_fail += 1
        else:
            assert (
                False
            ), f"Impossible situation, local_backup_result = {local_backup_result!r}"

        # This will handle all the S3 bucket related operations
        # and count the number of successes and failures.
        s3_backup_result = backup_to_s3(
            s3_obj,
            logger,
            controller_path,
            controller,
            tb,
            tar,
            resultname,
            archive_md5_hex_value,
        )

        if s3_backup_result == Status.SUCCESS:
            ns3_success += 1
        elif s3_backup_result == Status.FAIL:
            ns3_fail += 1
        else:
            assert (
                False
            ), f"Impossible situation, s3_backup_result = {s3_backup_result!r}"

        if local_backup_result == Status.SUCCESS and (
                s3_obj is None or s3_backup_result == Status.SUCCESS):
            # Move tar ball symlink to its final resting place
            rename_tb_link(tb, Path(controller_path, _linkdest), logger)
        else:
            # Do nothing when the backup fails, allowing us to retry on a
            # future pass.
            pass

        if dataset:
            Metadata.create(dataset=dataset,
                            key=Metadata.ARCHIVED,
                            value="True")
        logger.debug("End backup of {}.", tar)

    return Results(
        ntotal=ntotal,
        nbackup_success=nbackup_success,
        nbackup_fail=nbackup_fail,
        ns3_success=ns3_success,
        ns3_fail=ns3_fail,
        nquaran=nquaran,
    )
Esempio n. 7
0
    def send_tools(self, data):
        """send_tools - send any collected tool data to the tool data sink.

        The 'action' and 'group' values of the payload have already been
        validated before this "send tools" action is invoked.

        This method only proceeds if the 'directory' entry value of the
        payload matches what was previously provided to a "start tools"
        action.
        """

        if len(set(self._tools.keys()) - set(self.persist_tools)) == 0:
            self._send_client_status("success")
            return 0

        directory = data["directory"]
        try:
            tool_dir = self.directories[directory]
        except KeyError:
            self.logger.error(
                "INTERNAL ERROR - send tools action encountered for a"
                " directory, '%s', that is different from any previous"
                " start tools directory, %r",
                directory,
                self.directories.keys(),
            )
            self._send_client_status("internal-error")
            return False

        if self._hostname == self._controller:
            del self.directories[directory]
            self.logger.info(
                "%s: send_tools (no-op) %s %s", self._hostname, self._group, tool_dir
            )
            # Note that we don't have a directory to send when a Tool
            # Meister runs on the same host as the controller.
            self._send_client_status("success")
            return 0

        assert tool_dir.name == self._hostname, (
            f"Logic Bomb! Final path component of the tool directory is"
            f" '{tool_dir.name}', not our host name '{self._hostname}'"
        )

        failures = 0
        parent_dir = tool_dir.parent
        tar_file = parent_dir / f"{self._hostname}.tar.xz"
        o_file = parent_dir / f"{self._hostname}.tar.out"
        e_file = parent_dir / f"{self._hostname}.tar.err"
        try:
            # Invoke tar directly for efficiency.
            with o_file.open("w") as ofp, e_file.open("w") as efp:
                cp = subprocess.run(
                    [tar_path, "-Jcf", tar_file, self._hostname],
                    cwd=parent_dir,
                    stdin=None,
                    stdout=ofp,
                    stderr=efp,
                )
        except Exception:
            self.logger.exception("Failed to create tools tar ball '%s'", tar_file)
        else:
            try:
                if cp.returncode != 0:
                    self.logger.error(
                        "Failed to create tools tar ball; return code: %d",
                        cp.returncode,
                    )
                    failures += 1
                else:
                    try:
                        tar_md5 = md5sum(tar_file)
                    except Exception:
                        self.logger.exception(
                            "Failed to read tools tar ball, '%s'", tar_file
                        )
                        failures += 1
                    else:
                        try:
                            o_file.unlink()
                        except Exception as exc:
                            self.logger.warning(
                                "Failure removing tar command output file, %s: %s",
                                o_file,
                                exc,
                            )
                        try:
                            e_file.unlink()
                        except Exception as exc:
                            self.logger.warning(
                                "Failure removing tar command output file, %s: %s",
                                e_file,
                                exc,
                            )

                        self.logger.debug(
                            "%s: starting send_tools %s %s",
                            self._hostname,
                            self._group,
                            self._directory,
                        )
                        headers = {"md5sum": tar_md5}
                        directory_bytes = data["directory"].encode("utf-8")
                        tool_data_ctx = hashlib.md5(directory_bytes).hexdigest()
                        url = (
                            f"http://{self._controller}:8080/tool-data"
                            f"/{tool_data_ctx}/{self._hostname}"
                        )
                        sent = False
                        retries = 200
                        while not sent:
                            try:
                                with tar_file.open("rb") as tar_fp:
                                    response = requests.put(
                                        url, headers=headers, data=tar_fp
                                    )
                            except (
                                ConnectionRefusedError,
                                requests.exceptions.ConnectionError,
                            ) as exc:
                                self.logger.debug("%s", exc)
                                # Try until we get a connection.
                                time.sleep(0.1)
                                retries -= 1
                                if retries <= 0:
                                    raise
                            else:
                                sent = True
                                if response.status_code != 200:
                                    self.logger.error(
                                        "PUT '%s' failed with '%d', '%s'",
                                        url,
                                        response.status_code,
                                        response.text,
                                    )
                                    failures += 1
                                else:
                                    self.logger.debug(
                                        "PUT '%s' succeeded ('%d', '%s')",
                                        url,
                                        response.status_code,
                                        response.text,
                                    )
                                    try:
                                        shutil.rmtree(parent_dir)
                                    except Exception:
                                        self.logger.exception(
                                            "Failed to remove tool data"
                                            " hierarchy, '%s'",
                                            parent_dir,
                                        )
                                        failures += 1
                                    else:
                                        del self.directories[directory]
                        self.logger.info(
                            "%s: send_tools completed %s %s",
                            self._hostname,
                            self._group,
                            tool_dir,
                        )
            except Exception:
                self.logger.exception("Unexpected error encountered")
        finally:
            # We always remove the created tar file regardless of success or
            # failure. The above code should take care of removing the
            # directory tree the tar file was created from when it was
            # successfully transferred, but if the transfer failed, we'll
            # still have the local directory, so the tar file is still
            # deletable.
            try:
                tar_file.unlink()
            except OSError as exc:
                if exc.errno != errno.ENOENT:
                    self.logger.warning(
                        "error removing tools tar ball, '%s': %s", tar_file, exc
                    )
            except Exception as exc:
                self.logger.warning(
                    "unexpected error removing tools tar ball, '%s': %s", tar_file, exc
                )

        self._send_client_status(
            "success" if failures == 0 else "failures sending tool data"
        )
        return failures