def mkdir(self, dentry): """ Create directory and optionally for GET operations set the metadata information. Args: dentry (list): Directory entry as read from the archive file. Raises: IOError: Directory creation failed. """ __, surl = self.get_endpoints(dentry[1]) fs = self.get_fs(surl) url = client.URL(surl.encode("utf-8")) # Create directory if not already existing st, __ = fs.stat((url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8")) if not st.ok: if not self.d2t: st, __ = fs.mkdir( (url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8")) else: st, __ = fs.mkdir((url.path).encode("utf-8")) if not st.ok: err_msg = ( "Dir={0} failed mkdir errmsg={1}, errno={2}, code={3}" "").format(surl, st.message.decode("utf-8"), st.errno, st.code) self.logger.error(err_msg) raise IOError(err_msg) # For GET operations set also the metadata if not self.d2t: dict_dinfo = dict(zip(self.header['dir_meta'], dentry[2:])) # Get the list of excluded extended attributes if it exists try: excl_xattr = self.header['excl_xattr'] except KeyError as __: excl_xattr = list() try: set_dir_info(surl, dict_dinfo, excl_xattr) except IOError as __: err_msg = "Dir={0} failed setting metadata".format(surl) self.logger.error(err_msg) raise IOError(err_msg)
def mkdir(self, dentry): """ Create directory and optionally for GET operations set the metadata information. Args: dentry (list): Directory entry as read from the archive file. Raises: IOError: Directory creation failed. """ __, surl = self.get_endpoints(dentry[1]) fs = self.get_fs(surl) url = client.URL(surl.encode("utf-8")) # Create directory if not already existing st, __ = fs.stat((url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8")) if not st.ok: st, __ = fs.mkdir((url.path + "?eos.ruid=0&eos.rgid=0").encode("utf-8")) if not st.ok: err_msg = ("Dir={0} failed mkdir errmsg={1}, errno={2}, code={3}" "").format(surl, st.message.decode("utf-8"), st.errno, st.code) self.logger.error(err_msg) raise IOError(err_msg) # For GET operations set also the metadata if not self.d2t: dict_dinfo = dict(zip(self.header['dir_meta'], dentry[2:])) # Get the list of excluded extended attributes if it exists try: excl_xattr = self.header['excl_xattr'] except KeyError as __: excl_xattr = list() try: set_dir_info(surl, dict_dinfo, excl_xattr) except IOError as __: err_msg = "Dir={0} failed setting metadata".format(surl) self.logger.error(err_msg) raise IOError(err_msg)
def breadth_first(self): """ Traverse the filesystem subtree using breadth-first search and collect the directory information and file information into separate files which will be merged in the end. """ # Dir format: type, rel_path, uid, gid, mode, attr dir_meta = "[\"uid\", \"gid\", \"mode\", \"attr\"]" dir_format = "[\"d\", \"{0}\", \"{1}\", \"{2}\", \"{3}\", {4}]" # File format: type, rel_path, size, mtime, ctime, uid, gid, mode, xstype, xs # Fake mtime and ctime subsecond precision file_meta = ("[\"size\", \"mtime\", \"ctime\", \"uid\", \"gid\", \"mode\", " "\"xstype\", \"xs\"]") file_format = ("[\"f\", \"{0}\", \"{1}\", \"{2}.0\", \"{3}.0\", \"{4}\", " "\"{5}\", \"{6}\", \"{7}\", \"{8}\"]") # Attrs for 2 replica layout in EOS with current user the only one # allowed to trigger archiving operations replica_attr = ("{{\"sys.acl\": \"u:{0}:a,z:i\", " "\"sys.forced.blockchecksum\": \"crc32c\", " "\"sys.forced.blocksize\": \"4k\", " "\"sys.forced.checksum\": \"adler\", " "\"sys.forced.layout\": \"replica\", " "\"sys.forced.nstripes\": \"2\", " "\"sys.forced.space\": \"default\"}}").format(self.uid) num_files, num_dirs = 0, 0 fs = client.FileSystem(str(self.src_url)) # Add root directory which is a bit special and set its metadata # Dir mode is 42755 and file mode is 0644 dir_mode = oct(stat.S_IFDIR | stat.S_ISGID | stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) dir_mode = dir_mode[1:] # remove leading 0 used for octal format file_mode = oct(stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) print(dir_format.format("./", self.uid, self.gid, dir_mode, replica_attr), file=self.fdirs) dict_attr = ast.literal_eval(replica_attr) dict_dinfo = dict(zip(["uid", "gid", "mode", "attr"], [self.uid, self.gid, dir_mode, dict_attr])) set_dir_info(str(self.dst_url), dict_dinfo, list()) root_path = self.src_url.path lst_dirs = [root_path] while lst_dirs: path = lst_dirs.pop(0) st, listing = fs.dirlist(path, DirListFlags.STAT) if not st.ok: msg = "Failed to list dir={0}".format(self.src_url.path) raise TapeAccessException(msg) for elem in listing: if elem.name == ".archive.init": msg = ("Trying to reconstruct an already existing archive " "in directory: {0}").format(path) raise TapeAccessException(msg) if elem.statinfo.flags & StatInfoFlags.IS_DIR: num_dirs += 1 full_path = ''.join([path, elem.name, '/']) rel_path = full_path.replace(root_path, "") lst_dirs.append(full_path) print(dir_format.format(rel_path, self.uid, self.gid, dir_mode, replica_attr), file=self.fdirs) else: full_path = ''.join([path, elem.name]) rel_path = full_path.replace(root_path, "") st, xs_resp = fs.query(QueryCode.CHECKSUM, full_path) if not st.ok: # If requested then skip the files that don't have a checksum if self.skip_no_xs: continue msg = "File={0} failed xs query".format(full_path) raise TapeAccessException(msg) num_files += 1 # Result has an annoying \x00 character at the end and it # contains the xs type (adler32) and the xs value resp = xs_resp.strip('\x00\0\n ').split() # If checksum value is not 8 char long then we need padding if len(resp[1]) != 8 : resp[1] = "{0:0>8}".format(resp[1]) if resp[0] != "adler32": msg = ("Unknown checksum type={0} from tape system" "".format(resp[0])) raise TapeAccessException(msg) print(file_format.format(rel_path, elem.statinfo.size, elem.statinfo.modtime, elem.statinfo.modtime, self.uid, self.gid, file_mode, "adler", resp[1]), file=self.ffiles) # Write archive file header header_format = ("{{\"src\": \"{0}\", " "\"dst\": \"{1}\", " "\"svc_class\": \"{2}\", " "\"dir_meta\": {3}, " "\"file_meta\": {4}, " "\"num_dirs\": {5}, " "\"num_files\": {6}, " "\"uid\": \"{7}\", " "\"gid\": \"{8}\", " "\"timestamp\": \"{9}\"}}") print(header_format.format(str(self.dst_url), str(self.src_url), self.svc_class, dir_meta, file_meta, num_dirs, num_files, self.uid, self.gid, time.time()), file=self.farchive, end="\n") # Rewind to the begining of the tmp files self.fdirs.seek(0) self.ffiles.seek(0) # Write directories for line in self.fdirs: print(line, file=self.farchive, end="") # Write files for line in self.ffiles: print(line, file=self.farchive, end="") self.farchive.close()
def breadth_first(self): """ Traverse the filesystem subtree using breadth-first search and collect the directory information and file information into separate files which will be merged in the end. """ # Dir format: type, rel_path, uid, gid, mode, attr dir_meta = "[\"uid\", \"gid\", \"mode\", \"attr\"]" dir_format = "[\"d\", \"{0}\", \"{1}\", \"{2}\", \"{3}\", {4}]" # File format: type, rel_path, size, mtime, ctime, uid, gid, mode, xstype, xs # Fake mtime and ctime subsecond precision file_meta = ( "[\"size\", \"mtime\", \"ctime\", \"uid\", \"gid\", \"mode\", " "\"xstype\", \"xs\"]") file_format = ( "[\"f\", \"{0}\", \"{1}\", \"{2}.0\", \"{3}.0\", \"{4}\", " "\"{5}\", \"{6}\", \"{7}\", \"{8}\"]") # Attrs for 2 replica layout in EOS with current user the only one # allowed to trigger archiving operations replica_attr = ("{{\"sys.acl\": \"u:{0}:a,z:i\", " "\"sys.forced.blockchecksum\": \"crc32c\", " "\"sys.forced.blocksize\": \"4k\", " "\"sys.forced.checksum\": \"adler\", " "\"sys.forced.layout\": \"replica\", " "\"sys.forced.nstripes\": \"2\", " "\"sys.forced.space\": \"default\"}}").format(self.uid) num_files, num_dirs = 0, 0 fs = client.FileSystem(str(self.src_url)) # Add root directory which is a bit special and set its metadata # Dir mode is 42755 and file mode is 0644 dir_mode = oct(stat.S_IFDIR | stat.S_ISGID | stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) dir_mode = dir_mode[1:] # remove leading 0 used for octal format file_mode = oct(stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) print(dir_format.format("./", self.uid, self.gid, dir_mode, replica_attr), file=self.fdirs) dict_attr = ast.literal_eval(replica_attr) dict_dinfo = dict( zip(["uid", "gid", "mode", "attr"], [self.uid, self.gid, dir_mode, dict_attr])) set_dir_info(str(self.dst_url), dict_dinfo, list()) root_path = self.src_url.path lst_dirs = [root_path] while lst_dirs: path = lst_dirs.pop(0) st, listing = fs.dirlist(path, DirListFlags.STAT) if not st.ok: msg = "Failed to list dir={0}".format(self.src_url.path) raise TapeAccessException(msg) for elem in listing: if elem.name == ".archive.init": msg = ("Trying to reconstruct an already existing archive " "in directory: {0}").format(path) raise TapeAccessException(msg) if elem.statinfo.flags & StatInfoFlags.IS_DIR: num_dirs += 1 full_path = ''.join([path, elem.name, '/']) rel_path = full_path.replace(root_path, "") lst_dirs.append(full_path) print(dir_format.format(rel_path, self.uid, self.gid, dir_mode, replica_attr), file=self.fdirs) else: full_path = ''.join([path, elem.name]) rel_path = full_path.replace(root_path, "") st, xs_resp = fs.query(QueryCode.CHECKSUM, full_path) if not st.ok: # If requested then skip the files that don't have a checksum if self.skip_no_xs: continue msg = "File={0} failed xs query".format(full_path) raise TapeAccessException(msg) num_files += 1 # Result has an annoying \x00 character at the end and it # contains the xs type (adler32) and the xs value resp = xs_resp.strip('\x00\0\n ').split() # If checksum value is not 8 char long then we need padding if len(resp[1]) != 8: resp[1] = "{0:0>8}".format(resp[1]) if resp[0] != "adler32": msg = ("Unknown checksum type={0} from tape system" "".format(resp[0])) raise TapeAccessException(msg) print(file_format.format(rel_path, elem.statinfo.size, elem.statinfo.modtime, elem.statinfo.modtime, self.uid, self.gid, file_mode, "adler", resp[1]), file=self.ffiles) # Write archive file header header_format = ("{{\"src\": \"{0}\", " "\"dst\": \"{1}\", " "\"svc_class\": \"{2}\", " "\"dir_meta\": {3}, " "\"file_meta\": {4}, " "\"num_dirs\": {5}, " "\"num_files\": {6}, " "\"uid\": \"{7}\", " "\"gid\": \"{8}\", " "\"timestamp\": \"{9}\"}}") print(header_format.format(str(self.dst_url), str(self.src_url), self.svc_class, dir_meta, file_meta, num_dirs, num_files, self.uid, self.gid, time.time()), file=self.farchive, end="\n") # Rewind to the begining of the tmp files self.fdirs.seek(0) self.ffiles.seek(0) # Write directories for line in self.fdirs: print(line, file=self.farchive, end="") # Write files for line in self.ffiles: print(line, file=self.farchive, end="") self.farchive.close()