Exemple #1
0
    def mkdir(self, dentry):
        """ Create directory and optionally for GET operations set the
        metadata information.

        Args:
            dentry (list): Directory entry as read from the archive file.

        Raises:
            IOError: Directory creation failed.
        """
        __, surl = self.get_endpoints(dentry[1])
        fs = self.get_fs(surl)
        url = client.URL(surl.encode("utf-8"))

        # Create directory if not already existing
        st, __ = fs.stat((url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))

        if not st.ok:
            if not self.d2t:
                st, __ = fs.mkdir(
                    (url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))
            else:
                st, __ = fs.mkdir((url.path).encode("utf-8"))

            if not st.ok:
                err_msg = (
                    "Dir={0} failed mkdir errmsg={1}, errno={2}, code={3}"
                    "").format(surl, st.message.decode("utf-8"), st.errno,
                               st.code)
                self.logger.error(err_msg)
                raise IOError(err_msg)

        # For GET operations set also the metadata
        if not self.d2t:
            dict_dinfo = dict(zip(self.header['dir_meta'], dentry[2:]))

            # Get the list of excluded extended attributes if it exists
            try:
                excl_xattr = self.header['excl_xattr']
            except KeyError as __:
                excl_xattr = list()

            try:
                set_dir_info(surl, dict_dinfo, excl_xattr)
            except IOError as __:
                err_msg = "Dir={0} failed setting metadata".format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)
Exemple #2
0
    def mkdir(self, dentry):
        """ Create directory and optionally for GET operations set the
        metadata information.

        Args:
            dentry (list): Directory entry as read from the archive file.

        Raises:
            IOError: Directory creation failed.
        """
        __, surl = self.get_endpoints(dentry[1])
        fs = self.get_fs(surl)
        url = client.URL(surl.encode("utf-8"))

        # Create directory if not already existing
        st, __ = fs.stat((url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))

        if not st.ok:
            st, __ = fs.mkdir((url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8"))

            if not st.ok:
                err_msg = ("Dir={0} failed mkdir errmsg={1}, errno={2}, code={3}"
                           "").format(surl, st.message.decode("utf-8"), st.errno, st.code)
                self.logger.error(err_msg)
                raise IOError(err_msg)

        # For GET operations set also the metadata
        if not self.d2t:
            dict_dinfo = dict(zip(self.header['dir_meta'], dentry[2:]))

            # Get the list of excluded extended attributes if it exists
            try:
                excl_xattr = self.header['excl_xattr']
            except KeyError as __:
                excl_xattr = list()

            try:
                set_dir_info(surl, dict_dinfo, excl_xattr)
            except IOError as __:
                err_msg = "Dir={0} failed setting metadata".format(surl)
                self.logger.error(err_msg)
                raise IOError(err_msg)
Exemple #3
0
    def breadth_first(self):
        """ Traverse the filesystem subtree using breadth-first search and
        collect the directory information and file information into separate
        files which will be merged in the end.
        """
        # Dir format: type, rel_path, uid, gid, mode, attr
        dir_meta = "[\"uid\", \"gid\", \"mode\", \"attr\"]"
        dir_format = "[\"d\", \"{0}\", \"{1}\", \"{2}\", \"{3}\", {4}]"
        # File format: type, rel_path, size, mtime, ctime, uid, gid, mode, xstype, xs
        # Fake mtime and ctime subsecond precision
        file_meta = ("[\"size\", \"mtime\", \"ctime\", \"uid\", \"gid\", \"mode\", "
                     "\"xstype\", \"xs\"]")
        file_format = ("[\"f\", \"{0}\", \"{1}\", \"{2}.0\", \"{3}.0\", \"{4}\", "
                       "\"{5}\", \"{6}\", \"{7}\", \"{8}\"]")
        # Attrs for 2 replica layout in EOS with current user the only one
        # allowed to trigger archiving operations
        replica_attr = ("{{\"sys.acl\": \"u:{0}:a,z:i\", "
                        "\"sys.forced.blockchecksum\": \"crc32c\", "
                        "\"sys.forced.blocksize\": \"4k\", "
                        "\"sys.forced.checksum\": \"adler\", "
                        "\"sys.forced.layout\": \"replica\", "
                        "\"sys.forced.nstripes\": \"2\", "
                        "\"sys.forced.space\": \"default\"}}").format(self.uid)
        num_files, num_dirs = 0, 0
        fs = client.FileSystem(str(self.src_url))

        # Add root directory which is a bit special and set its metadata
        # Dir mode is 42755 and file mode is 0644
        dir_mode = oct(stat.S_IFDIR | stat.S_ISGID | stat.S_IRWXU
                   | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
        dir_mode = dir_mode[1:] # remove leading 0 used for octal format
        file_mode = oct(stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
        print(dir_format.format("./", self.uid, self.gid, dir_mode,
                                replica_attr), file=self.fdirs)
        dict_attr = ast.literal_eval(replica_attr)
        dict_dinfo = dict(zip(["uid", "gid", "mode", "attr"],
                              [self.uid, self.gid, dir_mode, dict_attr]))
        set_dir_info(str(self.dst_url), dict_dinfo, list())
        root_path = self.src_url.path
        lst_dirs = [root_path]

        while lst_dirs:
            path = lst_dirs.pop(0)
            st, listing = fs.dirlist(path, DirListFlags.STAT)

            if not st.ok:
                msg = "Failed to list dir={0}".format(self.src_url.path)
                raise TapeAccessException(msg)

            for elem in listing:
                if elem.name == ".archive.init":
                    msg = ("Trying to reconstruct an already existing archive "
                           "in directory: {0}").format(path)
                    raise TapeAccessException(msg)

                if elem.statinfo.flags & StatInfoFlags.IS_DIR:
                    num_dirs += 1
                    full_path = ''.join([path, elem.name, '/'])
                    rel_path = full_path.replace(root_path, "")
                    lst_dirs.append(full_path)
                    print(dir_format.format(rel_path, self.uid, self.gid, dir_mode,
                                            replica_attr), file=self.fdirs)
                else:
                    full_path = ''.join([path, elem.name])
                    rel_path = full_path.replace(root_path, "")
                    st, xs_resp = fs.query(QueryCode.CHECKSUM, full_path)

                    if not st.ok:
                        # If requested then skip the files that don't have a checksum
                        if self.skip_no_xs:
                            continue

                        msg = "File={0} failed xs query".format(full_path)
                        raise TapeAccessException(msg)

                    num_files += 1
                    # Result has an annoying \x00 character at the end and it
                    # contains the xs type (adler32) and the xs value
                    resp = xs_resp.strip('\x00\0\n ').split()

                    # If checksum value is not 8 char long then we need padding
                    if len(resp[1]) != 8 :
                        resp[1] = "{0:0>8}".format(resp[1])

                    if resp[0] != "adler32":
                        msg = ("Unknown checksum type={0} from tape system"
                               "".format(resp[0]))
                        raise TapeAccessException(msg)

                    print(file_format.format(rel_path, elem.statinfo.size,
                                             elem.statinfo.modtime,
                                             elem.statinfo.modtime,
                                             self.uid, self.gid, file_mode,
                                             "adler", resp[1]),
                          file=self.ffiles)

        # Write archive file header
        header_format = ("{{\"src\": \"{0}\", "
                         "\"dst\": \"{1}\", "
                         "\"svc_class\": \"{2}\", "
                         "\"dir_meta\": {3}, "
                         "\"file_meta\": {4}, "
                         "\"num_dirs\": {5}, "
                         "\"num_files\": {6}, "
                         "\"uid\": \"{7}\", "
                         "\"gid\": \"{8}\", "
                         "\"timestamp\": \"{9}\"}}")
        print(header_format.format(str(self.dst_url), str(self.src_url),
                                   self.svc_class, dir_meta, file_meta,
                                   num_dirs, num_files, self.uid,
                                   self.gid, time.time()),
              file=self.farchive, end="\n")
        # Rewind to the begining of the tmp files
        self.fdirs.seek(0)
        self.ffiles.seek(0)

        # Write directories
        for line in self.fdirs:
            print(line, file=self.farchive, end="")

        # Write files
        for line in self.ffiles:
            print(line, file=self.farchive, end="")

        self.farchive.close()
Exemple #4
0
    def breadth_first(self):
        """ Traverse the filesystem subtree using breadth-first search and
        collect the directory information and file information into separate
        files which will be merged in the end.
        """
        # Dir format: type, rel_path, uid, gid, mode, attr
        dir_meta = "[\"uid\", \"gid\", \"mode\", \"attr\"]"
        dir_format = "[\"d\", \"{0}\", \"{1}\", \"{2}\", \"{3}\", {4}]"
        # File format: type, rel_path, size, mtime, ctime, uid, gid, mode, xstype, xs
        # Fake mtime and ctime subsecond precision
        file_meta = (
            "[\"size\", \"mtime\", \"ctime\", \"uid\", \"gid\", \"mode\", "
            "\"xstype\", \"xs\"]")
        file_format = (
            "[\"f\", \"{0}\", \"{1}\", \"{2}.0\", \"{3}.0\", \"{4}\", "
            "\"{5}\", \"{6}\", \"{7}\", \"{8}\"]")
        # Attrs for 2 replica layout in EOS with current user the only one
        # allowed to trigger archiving operations
        replica_attr = ("{{\"sys.acl\": \"u:{0}:a,z:i\", "
                        "\"sys.forced.blockchecksum\": \"crc32c\", "
                        "\"sys.forced.blocksize\": \"4k\", "
                        "\"sys.forced.checksum\": \"adler\", "
                        "\"sys.forced.layout\": \"replica\", "
                        "\"sys.forced.nstripes\": \"2\", "
                        "\"sys.forced.space\": \"default\"}}").format(self.uid)
        num_files, num_dirs = 0, 0
        fs = client.FileSystem(str(self.src_url))

        # Add root directory which is a bit special and set its metadata
        # Dir mode is 42755 and file mode is 0644
        dir_mode = oct(stat.S_IFDIR | stat.S_ISGID | stat.S_IRWXU
                       | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH
                       | stat.S_IXOTH)
        dir_mode = dir_mode[1:]  # remove leading 0 used for octal format
        file_mode = oct(stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP
                        | stat.S_IROTH)
        print(dir_format.format("./", self.uid, self.gid, dir_mode,
                                replica_attr),
              file=self.fdirs)
        dict_attr = ast.literal_eval(replica_attr)
        dict_dinfo = dict(
            zip(["uid", "gid", "mode", "attr"],
                [self.uid, self.gid, dir_mode, dict_attr]))
        set_dir_info(str(self.dst_url), dict_dinfo, list())
        root_path = self.src_url.path
        lst_dirs = [root_path]

        while lst_dirs:
            path = lst_dirs.pop(0)
            st, listing = fs.dirlist(path, DirListFlags.STAT)

            if not st.ok:
                msg = "Failed to list dir={0}".format(self.src_url.path)
                raise TapeAccessException(msg)

            for elem in listing:
                if elem.name == ".archive.init":
                    msg = ("Trying to reconstruct an already existing archive "
                           "in directory: {0}").format(path)
                    raise TapeAccessException(msg)

                if elem.statinfo.flags & StatInfoFlags.IS_DIR:
                    num_dirs += 1
                    full_path = ''.join([path, elem.name, '/'])
                    rel_path = full_path.replace(root_path, "")
                    lst_dirs.append(full_path)
                    print(dir_format.format(rel_path, self.uid, self.gid,
                                            dir_mode, replica_attr),
                          file=self.fdirs)
                else:
                    full_path = ''.join([path, elem.name])
                    rel_path = full_path.replace(root_path, "")
                    st, xs_resp = fs.query(QueryCode.CHECKSUM, full_path)

                    if not st.ok:
                        # If requested then skip the files that don't have a checksum
                        if self.skip_no_xs:
                            continue

                        msg = "File={0} failed xs query".format(full_path)
                        raise TapeAccessException(msg)

                    num_files += 1
                    # Result has an annoying \x00 character at the end and it
                    # contains the xs type (adler32) and the xs value
                    resp = xs_resp.strip('\x00\0\n ').split()

                    # If checksum value is not 8 char long then we need padding
                    if len(resp[1]) != 8:
                        resp[1] = "{0:0>8}".format(resp[1])

                    if resp[0] != "adler32":
                        msg = ("Unknown checksum type={0} from tape system"
                               "".format(resp[0]))
                        raise TapeAccessException(msg)

                    print(file_format.format(rel_path, elem.statinfo.size,
                                             elem.statinfo.modtime,
                                             elem.statinfo.modtime, self.uid,
                                             self.gid, file_mode, "adler",
                                             resp[1]),
                          file=self.ffiles)

        # Write archive file header
        header_format = ("{{\"src\": \"{0}\", "
                         "\"dst\": \"{1}\", "
                         "\"svc_class\": \"{2}\", "
                         "\"dir_meta\": {3}, "
                         "\"file_meta\": {4}, "
                         "\"num_dirs\": {5}, "
                         "\"num_files\": {6}, "
                         "\"uid\": \"{7}\", "
                         "\"gid\": \"{8}\", "
                         "\"timestamp\": \"{9}\"}}")
        print(header_format.format(str(self.dst_url), str(self.src_url),
                                   self.svc_class, dir_meta, file_meta,
                                   num_dirs, num_files, self.uid, self.gid,
                                   time.time()),
              file=self.farchive,
              end="\n")
        # Rewind to the begining of the tmp files
        self.fdirs.seek(0)
        self.ffiles.seek(0)

        # Write directories
        for line in self.fdirs:
            print(line, file=self.farchive, end="")

        # Write files
        for line in self.ffiles:
            print(line, file=self.farchive, end="")

        self.farchive.close()