Exemple #1
0
    def del_empty_dirs(self):
        """ This is done in case of a backup operation with a time window so that
        we delete any empty directories from the subtree. For this we need to walk
        the directory structure from the deepest one to the root.

        Raises:
            IOError: Cand not delete directory
        """
        lst_dirs = []
        tags = ['container', 'files']

        for dentry in self.dirs():
            lst_dirs.insert(0, dentry)

        for dentry in lst_dirs:
            __, dst = self.get_endpoints(dentry[1])
            url = client.URL(dst.encode("utf-8"))
            # Get a list which contains:
            # ['relative_path', 'd', number_containers, number_files]
            info = get_entry_info(url, dentry[1], tags, is_dir=True)
            self.logger.info("Info is: {0}".format(info))

            if int(info[2]) == 0 and int(info[3]) == 0:
                fs = self.get_fs(dst)
                st_rm, __ = fs.rmdir(
                    (url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))

                if not st_rm.ok:
                    err_msg = "Error removing entry={0}".format(dst)
                    self.logger.error(err_msg)
                    raise IOError()
Exemple #2
0
    def evict_disk_cache(self):
        """ Send a prepare eviect request to the CTA so that the files are
        removed from the disk cached of the tape system.
        """
        batch_size = 100
        timeout = 10
        batch = []
        # @todo(esindril) use the XRootD proived flag once this is
        # available in the Python interface
        xrd_prepare_evict_flag = 0x000100000000

        for fentry in self.archive.files():
            __, dst = self.archive.get_endpoints(fentry[1])
            url = client.URL(dst.encode("utf-8"))
            batch.append(url.path.encode("utf-8"))

            if len(batch) == batch_size:
                fs = self.archive.get_fs(dst)
                prep_stat, __ = fs.prepare(batch, xrd_prepare_evict_flag, 0,
                                           timeout)
                batch.clear()

                if not prep_stat.ok:
                    self.logger.warning("Failed prepare evit for batch")

        if len(batch) != 0:
            fs = self.archive.get_fs(dst)
            prep_stat, __ = fs.prepare(batch, xrd_prepare_evict_flag, 0,
                                       timeout)
            batch.clear()

            if not prep_stat.ok:
                self.logger.warning("Failed prepare evit for batch")

        self.logger.info("Finished sending all the prepare evict requests")
Exemple #3
0
def test_exec_cmd():
    """Check the exec command.

    List directory extended attributes from EOS local instance.
    """
    url = client.URL(''.join([SERVER_URL, EOS_DIR]))
    flsattr = ''.join([
        url.protocol, "://", url.hostid, "//proc/user/",
        "?mgm.cmd=attr&mgm.subcmd=ls&mgm.path=", EOS_DIR
    ])
    (status, stdout, __) = exec_cmd(flsattr)
    assert (status)
Exemple #4
0
    def archive_prepare(self):
        """ Prepare requested archive operation.

        Raises:
            IOError: Failed to rename or transfer archive file.
        """
        # Rename archive file in EOS
        efile_url = client.URL(self.efile_full.encode("utf-8"))
        eosf_rename = ''.join(
            [self.efile_root, self.config.ARCH_FN, ".", self.oper, ".err"])
        rename_url = client.URL(eosf_rename.encode("utf-8"))
        frename = ''.join([
            rename_url.protocol, "://", rename_url.hostid,
            "//proc/user/?mgm.cmd=file&mgm.subcmd=rename"
            "&mgm.path=", efile_url.path, "&mgm.file.source=", efile_url.path,
            "&mgm.file.target=", rename_url.path
        ])
        (status, __, stderr) = exec_cmd(frename)

        if not status:
            err_msg = ("Failed to rename archive file {0} to {1}, msg={2}"
                       "").format(self.efile_full, rename_url, stderr)
            self.logger.error(err_msg)
            raise IOError(err_msg)

        # Copy archive file from EOS to the local disk
        self.efile_full = eosf_rename
        eos_fs = client.FileSystem(self.efile_full.encode("utf-8"))
        st, _ = eos_fs.copy(self.efile_full + "?eos.ruid=0&eos.rgid=0",
                            self.tx_file, True)

        if not st.ok:
            err_msg = ("Failed to copy archive file={0} to local disk at={1}"
                       "").format(self.efile_full, self.tx_file)
            self.logger.error(err_msg)
            raise IOError(err_msg)

        # Create the ArchiveFile object
        d2t = (self.oper == self.config.PUT_OP)
        self.archive = ArchiveFile(self.tx_file, d2t)
Exemple #5
0
    def do_backup(self):
        """ Perform a backup operation using the provided backup file.
        """
        t0 = time.time()
        indx_dir = 0

        # Root owns the .sys.b#.backup.file
        fs = client.FileSystem(self.efile_full)
        efile_url = client.URL(self.efile_full)
        arg = ''.join([
            efile_url.path, "?eos.ruid=0&eos.rgid=0&mgm.pcmd=chown&uid=0&gid=0"
        ])
        xrd_st, __ = fs.query(QueryCode.OPAQUEFILE, arg.encode("utf-8"))

        if not xrd_st.ok:
            err_msg = "Failed setting ownership of the backup file: {0}".format(
                self.efile_full)
            self.logger.error(err_msg)
            raise IOError(err_msg)

        # Create directories
        for dentry in self.archive.dirs():
            # Do special checks for root directory
            if dentry[1] == "./":
                self.archive.check_root_dir()

            indx_dir += 1
            self.archive.mkdir(dentry)
            msg = "create dir {0}/{1}".format(indx_dir,
                                              self.archive.header['num_dirs'])
            self.set_status(msg)

        # Copy files and set metadata information
        self.copy_files()
        self.update_file_access()

        self.set_status("verifying")
        check_ok, lst_failed = self.archive.verify(True)
        self.backup_write_status(lst_failed, check_ok)

        # Delete empty dirs if this was a backup with a time window
        if self.archive.header['twindow_type'] and self.archive.header[
                'twindow_val']:
            self.archive.del_empty_dirs()

        self.set_status("cleaning")
        self.logger.info("TIMING_transfer={0} sec".format(time.time() - t0))
        self.backup_tx_clean()
Exemple #6
0
    def mkdir(self, dentry):
        """ Create directory and optionally for GET operations set the
        metadata information.

        Args:
            dentry (list): Directory entry as read from the archive file.

        Raises:
            IOError: Directory creation failed.
        """
        __, surl = self.get_endpoints(dentry[1])
        fs = self.get_fs(surl)
        url = client.URL(surl.encode("utf-8"))

        # Create directory if not already existing
        st, __ = fs.stat((url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))

        if not st.ok:
            if not self.d2t:
                st, __ = fs.mkdir(
                    (url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))
            else:
                st, __ = fs.mkdir((url.path).encode("utf-8"))

            if not st.ok:
                err_msg = (
                    "Dir={0} failed mkdir errmsg={1}, errno={2}, code={3}"
                    "").format(surl, st.message.decode("utf-8"), st.errno,
                               st.code)
                self.logger.error(err_msg)
                raise IOError(err_msg)

        # For GET operations set also the metadata
        if not self.d2t:
            dict_dinfo = dict(zip(self.header['dir_meta'], dentry[2:]))

            # Get the list of excluded extended attributes if it exists
            try:
                excl_xattr = self.header['excl_xattr']
            except KeyError as __:
                excl_xattr = list()

            try:
                set_dir_info(surl, dict_dinfo, excl_xattr)
            except IOError as __:
                err_msg = "Dir={0} failed setting metadata".format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)
Exemple #7
0
    def backup_prepare(self):
        """ Prepare requested backup operation.

        Raises:
            IOError: Failed to transfer backup file.
        """
        # Copy backup file from EOS to the local disk
        self.logger.info(("Prepare backup copy from {0} to {1}"
                          "").format(self.efile_full, self.tx_file))
        eos_fs = client.FileSystem(self.efile_full.encode("utf-8"))
        st, _ = eos_fs.copy(
            (self.efile_full + "?eos.ruid=0&eos.rgid=0").encode("utf-8"),
            self.tx_file.encode("utf-8"), True)

        if not st.ok:
            err_msg = (
                "Failed to copy backup file={0} to local disk at={1} err_msg={2}"
                "").format(self.efile_full, self.tx_file, st.message)
            self.logger.error(err_msg)
            raise IOError(err_msg)

        # Create the ArchiveFile object for the backup which is similar to a
        # tape to disk transfer
        self.archive = ArchiveFile(self.tx_file, False)

        # Check that the destination directory exists and has mode 777, if
        # forced then skip checks
        if not self.force:
            surl = self.archive.header['dst']
            url = client.URL(surl.encode("utf-8"))
            fs = self.archive.get_fs(surl)
            st_stat, resp_stat = fs.stat(
                (url.path, + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))

            if st_stat.ok:
                err_msg = ("Failed to stat backup destination url={0}"
                           "").format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)

            if resp_stat.flags != (client.StatInfoFlags.IS_READABLE
                                   | client.StatInfoFlags.IS_WRITABLE):
                err_msg = ("Backup destination url={0} must have move 777"
                           ).format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)
Exemple #8
0
    def wait_on_tape(self):
        """ Check and wait that all the files are on tape, which in our case
        means checking the "m" bit. If file is not on tape then suspend the
        current thread for a period between 1 and 10 minutes depending on the
        index of the failed file.
        """
        min_timeout, max_timeout = 5, 1

        while True:
            indx = 0  # index of the first file not on tape
            all_on_tape = True

            for fentry in self.archive.files():
                indx += 1
                __, dst = self.archive.get_endpoints(fentry[1])
                url = client.URL(dst.encode("utf-8"))
                st_stat, resp_stat = self.archive.fs_dst.stat(
                    url.path.encode("utf-8"))

                if not st_stat.ok:
                    err_msg = "Error stat entry={0}".format(dst)
                    self.logger.err(err_msg)
                    raise IOError()

                # Check file is on tape
                if resp_stat.size != 0 and not (resp_stat.flags
                                                & StatInfoFlags.BACKUP_EXISTS):
                    self.logger.debug(
                        "File {0} is not yet on tape".format(dst))
                    all_on_tape = False
                    break

            if all_on_tape:
                break
            else:
                # Set timeout value
                ratio = indx / int(self.archive.header['num_files'])
                timeout = int(max_timeout * (1 - ratio))

                if timeout < min_timeout:
                    timeout = min_timeout

                self.logger.info(
                    "Going to sleep for {0} seconds".format(timeout))
                sleep(timeout)
Exemple #9
0
    def del_entry(self, rel_path, is_dir, tape_delete):
        """ Delete file/dir. For directories it is successful only if the dir
        is empty. For deleting the subtree rooted in a directory one needs to
        use the del_subtree method.

        Args:
            rel_path (str): Entry relative path as stored in the archive file.
            is_dir (bool): True is entry is dir, otherwise False.
            tape_delete(bool): If tape_delete is None the delete comes from a
                PUT or GET operations so we only use the value of self.d2t to
                decide which entry we will delete. If tape_delete is True we
                delete tape data, otherwise we purge (delete from disk only).

        Raises:
            IOError: Deletion could not be performed.
        """
        src, dst = self.get_endpoints(rel_path)

        if tape_delete is None:
            surl = dst  # self.d2t is already used inside get_endpoints
        else:
            surl = src if tape_delete else dst

        url = client.URL(surl.encode("utf-8"))
        fs = self.get_fs(surl)
        self.logger.debug("Delete entry={0}".format(surl))

        if is_dir:
            st_rm, __ = fs.rmdir(
                (url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))
        else:
            st_rm, __ = fs.rm(
                (url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))

        if not st_rm.ok:
            # Check if entry exists
            st_stat, __ = fs.stat(url.path.encode("utf-8"))

            if st_stat.ok:
                err_msg = "Error removing entry={0}".format(surl)
                self.logger.error(err_msg)
                raise IOError()

            self.logger.warning("Entry={0} already removed".format(surl))
Exemple #10
0
    def _verify_entry(self, entry, tx_check_only):
        """ Check that the entry (file/dir) has the proper meta data.

        Args:
            entry (list): Entry from the arhive file containing all info about
               this particular file/directory.
            tx_check_only (boolean): If True then for files only check their
                existence, size and checksum values.

        Raises:
            CheckEntryException: if entry verification fails.
        """
        self.logger.debug("Verify entry={0}".format(entry))
        is_dir, path = (entry[0] == 'd'), entry[1]
        __, dst = self.get_endpoints(path)
        url = client.URL(dst.encode("utf-8"))

        if self.d2t:  # for PUT check entry size and checksum if possible
            fs = self.get_fs(dst)
            st, stat_info = fs.stat(url.path.encode("utf-8"))

            if not st.ok:
                err_msg = "Entry={0} failed stat".format(dst)
                self.logger.error(err_msg)
                raise CheckEntryException("failed stat")

            if not is_dir:  # check file size match
                indx = self.header["file_meta"].index("size") + 2
                orig_size = int(entry[indx])

                if stat_info.size != orig_size:
                    err_msg = ("Verify entry={0}, expect_size={1}, size={2}"
                               "").format(dst, orig_size, stat_info.size)
                    self.logger.error(err_msg)
                    raise CheckEntryException("failed file size match")

                # Check checksum only if it is adler32 - only one supported by CASTOR
                indx = self.header["file_meta"].index("xstype") + 2

                # !!!HACK!!! Check the checksum only if file size is not 0 since
                # CASTOR does not store any checksum for 0 size files
                if stat_info.size != 0 and entry[indx] == "adler":
                    indx = self.header["file_meta"].index("xs") + 2
                    xs = entry[indx]
                    st, xs_resp = fs.query(QueryCode.CHECKSUM, url.path)

                    if not st.ok:
                        err_msg = "Entry={0} failed xs query".format(dst)
                        self.logger.error(err_msg)
                        raise CheckEntryException("failed xs query")

                    # Result has an annoying \x00 character at the end and it
                    # contains the xs type (adler32) and the xs value
                    resp = xs_resp.split('\x00')[0].split()

                    # If checksum value is not 8 char long then we need padding
                    if len(resp[1]) != 8:
                        resp[1] = "{0:0>8}".format(resp[1])

                    if resp[0] == "adler32" and resp[1] != xs:
                        err_msg = (
                            "Entry={0} xs value missmatch xs_expected={1} "
                            "xs_got={2}").format(dst, xs, resp[1])
                        self.logger.error(err_msg)
                        raise CheckEntryException("xs value missmatch")

        else:  # for GET check all metadata
            if is_dir:
                tags = self.header['dir_meta']
            else:
                tags = self.header['file_meta']
                try:
                    if self.header['twindow_type'] and self.header[
                            'twindow_val']:
                        dfile = dict(zip(tags, entry[2:]))
                        twindow_sec = int(self.header['twindow_val'])
                        tentry_sec = int(
                            float(dfile[self.header['twindow_type']]))

                        if tentry_sec < twindow_sec:
                            # No check for this entry
                            return

                    # This is a backup so don't check atomic version files
                    if is_atomic_version_file(entry[1]):
                        return
                except KeyError as __:
                    # This is not a backup transfer but an archive one, carry on
                    pass

            try:
                meta_info = get_entry_info(url, path, tags, is_dir)
            except (AttributeError, IOError, KeyError) as __:
                self.logger.error(
                    "Failed getting metainfo entry={0}".format(dst))
                raise CheckEntryException("failed getting metainfo")

            # Check if we have any excluded xattrs
            try:
                excl_xattr = self.header['excl_xattr']
            except KeyError as __:
                excl_xattr = list()

            if is_dir and excl_xattr:
                # For directories and configurations containing excluded xattrs
                # we refine the checks. If "*" in excl_xattr then no check is done.
                if "*" not in excl_xattr:
                    ref_dict = dict(zip(tags, entry[2:]))
                    new_dict = dict(zip(tags, meta_info[2:]))

                    for key, val in ref_dict.iteritems():
                        if not isinstance(val, dict):
                            if new_dict[key] != val:
                                err_msg = (
                                    "Verify failed for entry={0} expect={1} got={2}"
                                    " at key={3}").format(
                                        dst, entry, meta_info, key)
                                self.logger.error(err_msg)
                                raise CheckEntryException(
                                    "failed metainfo match")
                        else:
                            for kxattr, vxattr in val.iteritems():
                                if kxattr not in excl_xattr:
                                    if vxattr != new_dict[key][kxattr]:
                                        err_msg = (
                                            "Verify failed for entry={0} expect={1} got={2}"
                                            " at xattr key={3}").format(
                                                dst, entry, meta_info, kxattr)
                                        self.logger.error(err_msg)
                                        raise CheckEntryException(
                                            "failed metainfo match")
            else:
                # For files with tx_check_only verification, we refine the checks
                if tx_check_only and not is_dir:
                    idx_size = self.header["file_meta"].index("size") + 2
                    idx_xstype = self.header["file_meta"].index("xstype") + 2
                    idx_xsval = self.header["file_meta"].index("xs") + 2

                    if (meta_info[idx_size] != entry[idx_size]
                            or meta_info[idx_xstype] != entry[idx_xstype]
                            or meta_info[idx_xsval] != entry[idx_xsval]):
                        err_msg = (
                            "Partial verify failed for entry={0} expect={1} got={2}"
                            "").format(dst, entry, meta_info)
                        self.logger.error(err_msg)
                        raise CheckEntryException(
                            "failed metainfo partial match")
                else:
                    if not meta_info == entry:
                        err_msg = (
                            "Verify failed for entry={0} expect={1} got={2}"
                            "").format(dst, entry, meta_info)
                        self.logger.error(err_msg)
                        raise CheckEntryException("failed metainfo match")

        self.logger.info("Entry={0}, status={1}".format(dst, True))
Exemple #11
0
    def check_root_dir(self):
        """ Do the necessary checks for the destination directory depending on
        the type of the transfer.

        Raises:
             IOError: Root dir state inconsistent.
        """
        root_str = self.header['dst' if self.d2t else 'src']
        fs = self.get_fs(root_str)
        url = client.URL(root_str.encode("utf-8"))
        arg = url.path + "?eos.ruid=0&eos.rgid=0"
        st, __ = fs.stat(arg.encode("utf-8"))

        if self.d2t:
            if st.ok:
                # For PUT destination dir must NOT exist
                err_msg = "Root PUT dir={0} exists".format(root_str)
                self.logger.error(err_msg)
                raise IOError(err_msg)
            else:
                # Make sure the rest of the path exists as for the moment CASTOR
                # mkdir -p /path/to/file does not work properly
                pos = url.path.find('/', 1)

                while pos != -1:
                    dpath = url.path[:pos]
                    pos = url.path.find('/', pos + 1)
                    st, __ = fs.stat(dpath.encode("utf-8"))

                    if not st.ok:
                        st, __ = fs.mkdir(dpath.encode("utf-8"))

                        if not st.ok:
                            err_msg = ("Dir={0} failed mkdir errmsg={1}"
                                       "").format(dpath,
                                                  st.message.decode("utf-8"))
                            self.logger.error(err_msg)
                            raise IOError(err_msg)

        elif not self.d2t:
            # For GET destination must exist and contain just the archive file
            if not st.ok:
                err_msg = "Root GET dir={0} does NOT exist".format(root_str)
                self.logger.error(err_msg)
                raise IOError(err_msg)
            else:
                ffindcount = ''.join([
                    url.protocol, "://", url.hostid,
                    "//proc/user/?mgm.cmd=find&mgm.path=",
                    seal_path(url.path), "&mgm.option=Z"
                ])
                (status, stdout, stderr) = exec_cmd(ffindcount)

                if status:
                    for entry in stdout.split():
                        tag, num = entry.split('=')

                        if ((tag == 'nfiles' and num not in ['1', '2'])
                                or (tag == 'ndirectories' and num != '1')):
                            err_msg = (
                                "Root GET dir={0} should contain at least "
                                "one file and at most two - clean up and "
                                "try again").format(root_str)
                            self.logger.error(err_msg)
                            raise IOError(err_msg)
                else:
                    err_msg = ("Error doing find count on GET destination={0}"
                               ", msg={1}").format(root_str, stderr)
                    self.logger.error(err_msg)
                    raise IOError(err_msg)
Exemple #12
0
    def make_mutable(self):
        """ Make the EOS sub-tree pointed by header['src'] mutable.

        Raises:
            IOError when operation fails.
        """
        url = client.URL(self.header['src'].encode("utf-8"))

        for dentry in self.dirs():
            dir_path = url.path + dentry[1]
            fgetattr = ''.join([
                url.protocol, "://", url.hostid, "//proc/user/",
                "?mgm.cmd=attr&mgm.subcmd=get&mgm.attr.key=sys.acl",
                "&mgm.path=",
                seal_path(dir_path)
            ])
            (status, stdout, __) = exec_cmd(fgetattr)

            if not status:
                warn_msg = "No xattr sys.acl found for dir={0}".format(
                    dir_path)
                self.logger.warning(warn_msg)
            else:
                # Remove the 'z:i' rule from the acl list
                stdout = stdout.replace('"', '')
                acl_val = stdout[stdout.find('=') + 1:]
                rules = acl_val.split(',')
                new_rules = []

                for rule in rules:
                    if rule.startswith("z:"):
                        tag, definition = rule.split(':')
                        pos = definition.find('i')

                        if pos != -1:
                            definition = definition[:pos] + definition[pos +
                                                                       1:]

                            if definition:
                                new_rules.append(':'.join([tag, definition]))

                            continue

                    new_rules.append(rule)

                acl_val = ','.join(new_rules)
                self.logger.error("new acl: {0}".format(acl_val))

                if acl_val:
                    # Set the new sys.acl xattr
                    fmutable = ''.join([
                        url.protocol, "://", url.hostid, "//proc/user/?",
                        "mgm.cmd=attr&mgm.subcmd=set&mgm.attr.key=sys.acl",
                        "&mgm.attr.value=", acl_val, "&mgm.path=", dir_path
                    ])
                    (status, __, stderr) = exec_cmd(fmutable)

                    if not status:
                        err_msg = "Error making dir={0} mutable, msg={1}".format(
                            dir_path, stderr)
                        self.logger.error(err_msg)
                        raise IOError(err_msg)
                else:
                    # sys.acl empty, remove it from the xattrs
                    frmattr = ''.join([
                        url.protocol, "://", url.hostid, "//proc/user/?",
                        "mgm.cmd=attr&mgm.subcmd=rm&mgm.attr.key=sys.acl",
                        "&mgm.path=", dir_path
                    ])
                    (status, __, stderr) = exec_cmd(frmattr)

                    if not status:
                        err_msg = (
                            "Error removing xattr=sys.acl for dir={0}, msg={1}"
                            "").format(dir_path, stderr)
                        self.logger.error(err_msg)
                        raise IOError(err_msg)
Exemple #13
0
def main():
    """ Main function """
    parser = argparse.ArgumentParser(
        description="Tool used to create an archive "
        "file from an already existing archvie such "
        "that the recall of the files can be done "
        "using the EOS archiving tool. The files are "
        "copied back to EOS using the 2replica layout.")
    parser.add_argument(
        "-s",
        "--src",
        required=True,
        help="XRootD URL to archive tape source (CASTOR location)")
    parser.add_argument(
        "-d",
        "--dst",
        required=True,
        help="XRootD URL to archive disk destination (EOS location)")
    parser.add_argument(
        "-c",
        "--svc_class",
        default="default",
        help="Service class used for getting the files from tape")
    parser.add_argument("-u", "--uid", default="0", help="User UID (numeric)")
    parser.add_argument("-g", "--gid", default="0", help="User GID (numeric)")
    parser.add_argument("-x",
                        "--skip_no_xs",
                        default=False,
                        action="store_true",
                        help="Skip files that don't have a checksum")
    args = parser.parse_args()

    try:
        int(args.uid)
        int(args.gid)
    except ValueError as __:
        print("Error: UID/GID must be in numeric format", file=sys.stderr)
        exit(errno.EINVAL)

    # Make sure the source and destination are directories
    if args.src[-1] != '/':
        args.src += '/'
    if args.dst[-1] != '/':
        args.dst += '/'

    # Check the source and destiantion are valid XRootD URLs
    url_dst = client.URL(args.dst)
    url_src = client.URL(args.src)

    if not url_dst.is_valid() or not url_src.is_valid():
        print("Error: Destination/Source URL is not valid", file=sys.stderr)
        exit(errno.EINVAL)

    avoid_local = [
        "localhost", "localhost4", "localhost6", "localhost.localdomain",
        "localhost4.localdomain4", "localhost6.localdomain6"
    ]

    if url_dst.hostname in avoid_local or url_src.hostname in avoid_local:
        print("Please use FQDNs in the XRootD URLs", file=sys.stderr)
        exit(errno.EINVAL)

    try:
        check_eos_access(url_dst)
    except EosAccessException as err:
        print("Error: {0}".format(str(err)), file=sys.stderr)
        exit(errno.EPERM)

    archr = ArchReconstruct(url_src, url_dst, args)

    try:
        archr.breadth_first()
        archr.upload_archive()
    except (TapeAccessException, IOError) as err:
        print("Error: {0}".format(str(err)), file=sys.stderr)
        exit(errno.EIO)
Exemple #14
0
    def archive_tx_clean(self, check_ok):
        """ Clean the transfer by renaming the archive file in EOS adding the
        following extensions:
        .done - the transfer was successful
        .err  - there were errors during the transfer. These are logged in the
             file .archive.log in the same directory.

        Args:
            check_ok (bool): True if no error occured during transfer,
                otherwise false.
        """
        # Rename arch file in EOS to reflect the status
        if not check_ok:
            eosf_rename = ''.join(
                [self.efile_root, self.config.ARCH_FN, ".", self.oper, ".err"])
        else:
            eosf_rename = ''.join([
                self.efile_root, self.config.ARCH_FN, ".", self.oper, ".done"
            ])

        old_url = client.URL(self.efile_full.encode("utf-8"))
        new_url = client.URL(eosf_rename.encode("utf-8"))
        frename = ''.join([
            old_url.protocol, "://", old_url.hostid, "//proc/user/?",
            "mgm.cmd=file&mgm.subcmd=rename&mgm.path=", old_url.path,
            "&mgm.file.source=", old_url.path, "&mgm.file.target=",
            new_url.path
        ])
        (status, __, stderr) = exec_cmd(frename)

        if not status:
            err_msg = ("Failed to rename {0} to {1}, msg={2}"
                       "").format(self.efile_full, eosf_rename, stderr)
            self.logger.error(err_msg)
            # TODO: raise IOError
        else:
            # For successful delete operations remove also the archive file
            if self.oper == self.config.DELETE_OP and check_ok:
                fs = client.FileSystem(self.efile_full.encode("utf-8"))
                st_rm, __ = fs.rm(new_url.path + "?eos.ruid=0&eos.rgid=0")

                if not st_rm.ok:
                    warn_msg = "Failed to delete archive {0}".format(
                        new_url.path)
                    self.logger.warning(warn_msg)

        # Copy local log file back to EOS directory and set the ownership to the
        # identity of the client who triggered the archive
        dir_root = self.efile_root[self.efile_root.rfind('//') + 1:]
        eos_log = ''.join([
            old_url.protocol, "://", old_url.hostid, "/", dir_root,
            self.config.ARCH_FN, ".log?eos.ruid=0&eos.rgid=0"
        ])

        self.logger.debug("Copy log:{0} to {1}".format(self.config.LOG_FILE,
                                                       eos_log))
        self.config.handler.flush()
        cp_client = client.FileSystem(self.efile_full.encode("utf-8"))
        st, __ = cp_client.copy(self.config.LOG_FILE, eos_log, force=True)

        if not st.ok:
            self.logger.error(("Failed to copy log file {0} to EOS at {1}"
                               "").format(self.config.LOG_FILE, eos_log))
        else:
            # User triggering archive operation owns the log file
            eos_log_url = client.URL(eos_log)
            fs = client.FileSystem(eos_log.encode("utf-8"))
            arg = ''.join([
                eos_log_url.path, "?eos.ruid=0&eos.rgid=0&mgm.pcmd=chown&uid=",
                self.uid, "&gid=", self.gid
            ])
            xrd_st, __ = fs.query(QueryCode.OPAQUEFILE, arg.encode("utf-8"))

            if not xrd_st.ok:
                err_msg = ("Failed setting ownership of the log file in"
                           " EOS: {0}").format(eos_log)
                self.logger.error(err_msg)
                raise IOError(err_msg)
            else:
                # Delete log if successfully copied to EOS and changed ownership
                try:
                    os.remove(self.config.LOG_FILE)
                except OSError as __:
                    pass

        # Delete all local files associated with this transfer
        try:
            os.remove(self.tx_file)
        except OSError as __:
            pass

        # Join async status thread
        self.thread_status.do_finish()
        self.thread_status.join()
Exemple #15
0
def set_dir_info(surl, dict_dinfo, excl_xattr):
    """ Set directory metadata information in EOS.

    Args:
        surl (string): Full URL of directory
        dict_dinfo (dict): Dictionary containsing meta-data information
        excl_xattr (list): List of excluded extended attributes

    Raises:
        IOError: Metadata operation failed.
    """
    url = client.URL(surl.encode("utf-8"))

    # Change ownership of the directory
    fsetowner = ''.join([url.protocol, "://", url.hostid, "//proc/user/?",
                         "mgm.cmd=chown&mgm.path=", seal_path(url.path),
                         "&mgm.chown.owner=", dict_dinfo['uid'], ":",
                         dict_dinfo['gid']])
    (status, stdout, stderr) = exec_cmd(fsetowner)

    if not status:
        err_msg = "Dir={0}, error doing chown, msg={1}".format(url.path, stderr)
        logger.error(err_msg)
        raise IOError(err_msg)

    # Set permission on the directory
    fchmod = ''.join([url.protocol, "://", url.hostid, "//proc/user/?",
                      "mgm.cmd=chmod&mgm.path=", seal_path(url.path),
                      "&mgm.chmod.mode=", dict_dinfo['mode']])
    (status, stdout, stderr) = exec_cmd(fchmod)

    if not status:
        err_msg = "Dir={0}, error doing chmod, msg={1}".format(url.path, stderr)
        logger.error(err_msg)
        raise IOError(err_msg)

    # Deal with extended attributes. If all are excluded then don't touch them.
    if "*" in excl_xattr:
        return

    # Get all the current xattrs
    flsattr = ''.join([url.protocol, "://", url.hostid, "//proc/user/?",
                       "mgm.cmd=attr&mgm.subcmd=ls&mgm.path=", seal_path(url.path)])

    (status, stdout, stderr) = exec_cmd(flsattr)

    if not status:
        err_msg = "Dir={0}, error listing xattrs, msg ={1}".format(
            url.path, stderr)
        logger.error(err_msg)
        raise IOError(err_msg)

    lattrs = [s.split('=', 1)[0] for s in stdout.splitlines()]

    for attr in lattrs:
        # Don't remove the excluded xattrs
        if attr in excl_xattr:
            continue

        frmattr = ''.join([url.protocol, "://", url.hostid, "//proc/user/?",
                           "mgm.cmd=attr&mgm.subcmd=rm&mgm.attr.key=", attr,
                           "&mgm.path=", seal_path(url.path)])
        (status, __, stderr) = exec_cmd(frmattr)

        if not status:
            err_msg = ("Dir={0} error while removing attr={1}, msg={2}"
                       "").format(url.path, attr, stderr)
            logger.error(err_msg)
            raise IOError(err_msg)

    # Set the expected extended attributes
    dict_dattr = dict_dinfo['attr']

    for key, val in dict_dattr.iteritems():
        # Don't set the excluded xattrs
        if key in excl_xattr:
            continue

        fsetattr = ''.join([url.protocol, "://", url.hostid, "//proc/user/?",
                            "mgm.cmd=attr&mgm.subcmd=set&mgm.attr.key=", key,
                            "&mgm.attr.value=", val, "&mgm.path=", seal_path(url.path)])
        (status, __, stderr) = exec_cmd(fsetattr)

        if not status:
            err_msg = "Dir={0}, error setting attr={1}, msg={2}".format(
                url.path, key, stderr)
            logger.error(err_msg)
            raise IOError(err_msg)
Exemple #16
0
    def copy_files(self, err_entry=None, found_checkpoint=False):
        """ Copy files.

        Note that when doing PUT the layout is not conserved. Therefore, a file
        with 3 replicas will end up as just a simple file in the new location.

        Args:
            err_entry (list): Entry record from the archive file corresponding
                 to the first file/dir that was corrupted.
            found_checkpoint (boolean): If True it means the checkpoint was
                 already found and we don't need to search for it.

        Raises:
            IOError: Copy request failed.
        """
        indx_file = 0
        # For inital PUT copy also the archive file to tape
        if self.init_put:
            # The archive init is already renamed to archive.put.err at this
            # and we need to take this into consideration when trasferring it
            url = client.URL(self.efile_full.encode("utf-8"))
            eos_fs = client.FileSystem(self.efile_full.encode("utf-8"))
            st_stat, resp = eos_fs.stat(url.path.encode("utf-8"))

            if st_stat.ok:
                __, dst = self.archive.get_endpoints(self.config.ARCH_INIT)
                self.list_jobs.append(
                    (self.efile_full + "?eos.ruid=0&eos.rgid=0" +
                     "&eos.app=archive", dst, resp.size))
            else:
                err_msg = ''.join([
                    "Failed to get init archive file info, msg=",
                    st_stat.message
                ])
                self.logger.error(err_msg)
                raise IOError(err_msg)

        # Copy files
        for fentry in self.archive.files():
            # Search for the recovery checkpoint
            if self.do_retry and not found_checkpoint:
                if fentry != err_entry:
                    indx_file += 1
                    continue
                else:
                    found_checkpoint = True

            indx_file += 1
            msg = "copy file {0}/{1}".format(indx_file,
                                             self.archive.header['num_files'])
            self.set_status(msg)
            src, dst = self.archive.get_endpoints(fentry[1])
            dfile = dict(zip(self.archive.header['file_meta'], fentry[2:]))

            # Copy file
            if not self.archive.d2t:
                # For GET we also have the dictionary with the metadata

                dst = ''.join([
                    dst, "?eos.ctime=", dfile['ctime'], "&eos.mtime=",
                    dfile['mtime'], "&eos.bookingsize=", dfile['size'],
                    "&eos.targetsize=", dfile['size'],
                    "&eos.ruid=0&eos.rgid=0&eos.app=archive"
                ])

                # If checksum 0 don't enforce it
                if dfile['xs'] != "0":
                    dst = ''.join([dst, "&eos.checksum=", dfile['xs']])

                # For backup we try to read as root from the source
                if self.oper == self.config.BACKUP_OP:
                    if '?' in src:
                        src = ''.join(
                            [src, "&eos.ruid=0&eos.rgid=0&eos.app=archive"])
                    else:
                        src = ''.join(
                            [src, "?eos.ruid=0&eos.rgid=0&eos.app=archive"])

                    # If this is a version file we save it as a 2-replica layout
                    if is_version_file(fentry[1]):
                        dst = ''.join([
                            dst, "&eos.layout.checksum=", dfile['xstype'],
                            "&eos.layout.type=replica&eos.layout.nstripes=2"
                        ])

                    # If time window specified then select only the matching entries
                    if (self.archive.header['twindow_type']
                            and self.archive.header['twindow_val']):
                        twindow_sec = int(self.archive.header['twindow_val'])
                        tentry_sec = int(
                            float(dfile[self.archive.header['twindow_type']]))

                        if tentry_sec < twindow_sec:
                            continue
            else:
                # For PUT read the files from EOS as root
                src = ''.join([src, "?eos.ruid=0&eos.rgid=0&eos.app=archive"])

            self.logger.info("Copying from {0} to {1}".format(src, dst))
            self.list_jobs.append((src, dst, dfile['size']))

            if len(self.list_jobs) >= self.config.BATCH_SIZE:
                st = self.flush_files(False)

                # For archives we fail immediately, for backups it's best-effort
                if not st and self.oper != self.config.BACKUP_OP:
                    err_msg = "Failed to flush files"
                    self.logger.error(err_msg)
                    raise IOError(err_msg)

        # Flush all pending copies and set metadata info for GET operation
        st = self.flush_files(True)

        if not st and self.oper != self.config.BACKUP_OP:
            err_msg = "Failed to flush files"
            self.logger.error(err_msg)
            raise IOError(err_msg)
Exemple #17
0
    def prepare2get(self, err_entry=None, found_checkpoint=False):
        """This method is only executed for GET operations and its purpose is
        to issue the Prepapre2Get commands for the files in the archive which
        will later on be copied back to EOS.

        Args:
            err_entry (list): Entry record from the archive file corresponding
                 to the first file/dir that was corrupted.
            found_checkpoint (bool): If True it means the checkpoint was
                 already found and we don't need to search for it.

        Raises:
            IOError: The Prepare2Get request failed.
        """
        if self.archive.d2t:
            return

        count = 0
        limit = 50  # max files per prepare request
        oper = 'prepare'
        self.set_status("prepare2get")
        t0 = time.time()
        lpaths = []
        status = True
        metahandler = MetaHandler()

        for fentry in self.archive.files():
            # Find error checkpoint if not already found
            if err_entry and not found_checkpoint:
                if fentry != err_entry:
                    continue
                else:
                    found_checkpoint = True

            count += 1
            surl, __ = self.archive.get_endpoints(fentry[1])
            lpaths.append(surl[surl.rfind('//') + 1:])

            if len(lpaths) == limit:
                xrd_st = self.archive.fs_dst.prepare(
                    lpaths,
                    PrepareFlags.STAGE,
                    callback=metahandler.register(oper, surl))

                if not xrd_st.ok:
                    __ = metahandler.wait(oper)
                    err_msg = "Failed prepare2get for path={0}".format(surl)
                    self.logger.error(err_msg)
                    raise IOError(err_msg)

                # Wait for batch to be executed
                del lpaths[:]
                status = status and metahandler.wait(oper)
                self.logger.debug(
                    ("Prepare2get done count={0}/{1}"
                     "").format(count, self.archive.header['num_files']))

                if not status:
                    break

        # Send the remaining requests
        if lpaths and status:
            xrd_st = self.archive.fs_dst.prepare(lpaths,
                                                 PrepareFlags.STAGE,
                                                 callback=metahandler.register(
                                                     oper, surl))

            if not xrd_st.ok:
                __ = metahandler.wait(oper)
                err_msg = "Failed prepare2get"
                self.logger.error(err_msg)
                raise IOError(err_msg)

            # Wait for batch to be executed
            del lpaths[:]
            status = status and metahandler.wait(oper)

        if status:
            t1 = time.time()
            self.logger.info("TIMING_prepare2get={0} sec".format(t1 - t0))
        else:
            err_msg = "Failed prepare2get"
            self.logger.error(err_msg)
            raise IOError(err_msg)

        # Wait for all the files to be on disk
        for fentry in self.archive.files():
            surl, __ = self.archive.get_endpoints(fentry[1])
            url = client.URL(surl.encode("utf-8"))

            while True:
                st_stat, resp_stat = self.archive.fs_dst.stat(
                    url.path.encode("utf-8"))

                if not st_stat.ok:
                    err_msg = "Error stat entry={0}".format(surl)
                    self.logger.error(err_msg)
                    raise IOError()

                # Check if file is on disk
                if resp_stat.flags & StatInfoFlags.OFFLINE:
                    self.logger.info(
                        "Sleep 5 seconds, file not on disk entry={0}".format(
                            surl))
                    sleep(5)
                else:
                    break

        self.logger.info("Finished prepare2get, all files are on disk")
Exemple #18
0
    def update_file_access(self, err_entry=None, found_checkpoint=False):
        """ Set the ownership and the permissions for the files copied to EOS.
        This is done only for GET operation i.e. self.archive.d2t == False.

        Args:
           err_entry (list): Entry record from the archive file corresponding
               to the first file/dir that was corrupted.
           found_checkpoint (boolean): If True, it means the checkpoint was
                 already found and we don't need to search for it i.e. the
                 corrupted entry is a directory.

        Raises:
            IOError: chown or chmod operations failed
        """
        if self.archive.d2t:
            return

        self.set_status("updating file access")
        t0 = time.time()
        oper = 'query'
        metahandler = MetaHandler()
        fs = self.archive.fs_src

        for fentry in self.archive.files():
            # If backup operation and time window specified then update only matching ones
            if self.oper == self.config.BACKUP_OP:
                if self.archive.header['twindow_type'] and self.archive.header[
                        'twindow_val']:
                    dfile = dict(
                        zip(self.archive.header['file_meta'], fentry[2:]))
                    twindow_sec = int(self.archive.header['twindow_val'])
                    tentry_sec = int(
                        float(dfile[self.archive.header['twindow_type']]))

                    if tentry_sec < twindow_sec:
                        continue

            # Search for the recovery checkpoint
            if err_entry and not found_checkpoint:
                if fentry != err_entry:
                    continue
                else:
                    found_checkpoint = True

            __, surl = self.archive.get_endpoints(fentry[1])
            url = client.URL(surl.encode("utf-8"))
            dict_meta = dict(zip(self.archive.header['file_meta'], fentry[2:]))

            # Send the chown async request
            arg = ''.join([
                url.path, "?eos.ruid=0&eos.rgid=0&mgm.pcmd=chown&uid=",
                dict_meta['uid'], "&gid=", dict_meta['gid']
            ])
            xrd_st = fs.query(QueryCode.OPAQUEFILE,
                              arg.encode("utf-8"),
                              callback=metahandler.register(oper, surl))

            if not xrd_st.ok:
                __ = metahandler.wait(oper)
                err_msg = "Failed query chown for path={0}".format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)

            # Send the chmod async request
            mode = int(dict_meta['mode'], 8)  # mode is saved in octal format
            arg = ''.join([
                url.path, "?eos.ruid=0&eos.rgid=0&mgm.pcmd=chmod&mode=",
                str(mode)
            ])
            xrd_st = fs.query(QueryCode.OPAQUEFILE,
                              arg.encode("utf-8"),
                              callback=metahandler.register(oper, surl))

            if not xrd_st.ok:
                __ = metahandler.wait(oper)
                err_msg = "Failed query chmod for path={0}".format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)

            # Send the utime async request to set the mtime
            mtime = dict_meta['mtime']
            mtime_sec, mtime_nsec = mtime.split('.', 1)
            ctime = dict_meta['ctime']
            ctime_sec, ctime_nsec = ctime.split('.', 1)
            arg = ''.join([
                url.path, "?eos.ruid=0&eos.rgid=0&mgm.pcmd=utimes",
                "&tv1_sec=", ctime_sec, "&tv1_nsec=", ctime_nsec, "&tv2_sec=",
                mtime_sec, "&tv2_nsec=", mtime_nsec
            ])
            xrd_st = fs.query(QueryCode.OPAQUEFILE,
                              arg.encode("utf-8"),
                              callback=metahandler.register(oper, surl))

            if not xrd_st.ok:
                __ = metahandler.wait(oper)
                err_msg = "Failed query utimes for path={0}".format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)

        status = metahandler.wait(oper)

        if status:
            t1 = time.time()
            self.logger.info("TIMING_update_file_access={0} sec".format(t1 -
                                                                        t0))
        else:
            err_msg = "Failed update file access"
            self.logger.error(err_msg)
            raise IOError(err_msg)
Exemple #19
0
 def __init__(self, path):
     self._path = path
     self._url = client.URL(path)
     assert self._url.is_valid(), path