def split_source_files(self, dir_abspath: str, split_size: int): """transform some files into a single, splitted, archive move the content of the source folder in a subfolder create another folder in the same source folder create a tar.gz file with the first subfolder and split it into chunks into the second subfolder remove the first subfolder move the content of the second subfolder to its parent remove the second subfolder""" logger.info("Split '%s' into %s-bytes chunks" % (dir_abspath, split_size)) names = os.listdir(dir_abspath) if not names: return folder_1 = os.path.join(dir_abspath, str(uuid.uuid4())) folder_2 = os.path.join(dir_abspath, str(uuid.uuid4())) ensure_dir(folder_1, parent=False) for name in names: os.rename(os.path.join(dir_abspath, name), os.path.join(folder_1, name)) self.archive_and_split_directory(self.config, folder_1, folder_2, split_size=split_size) names = os.listdir(folder_2) shutil.rmtree(folder_1) for name in names: os.rename(os.path.join(folder_2, name), os.path.join(dir_abspath, name)) shutil.rmtree(folder_2)
def archive_and_split_directory( config: Config, original_path: str, splitted_path: str, split_size: int = 100 * 1000 * 1000, prefix: str = "content.tar.gz.", ): ensure_dir(splitted_path, parent=False) tar_cmd = [config.tar, "czf", "-", "-C", original_path, "."] split_cmd = [ config.split, "-b", str(split_size), "-", prefix, ] esc_tar_cmd = [shlex.quote(x) for x in tar_cmd] esc_split_cmd = [shlex.quote(x) for x in split_cmd] cmd = "%s | %s" % (" ".join(esc_tar_cmd), " ".join(esc_split_cmd)) logger.info("Archive and split '%s' to '%s'…" % (original_path, splitted_path)) p = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, cwd=splitted_path, ) stdout, stderr = p.communicate(b"") if p.returncode: logger.error("command = %s , return code = %s" % (cmd, p.returncode)) logger.error("stdout = %s\nstderr = %s" % (stdout.decode(), stderr.decode()))
def test_send_empty_dir_no_tar_no_split(self): with tempfile.TemporaryDirectory() as tmp_dir: src_path = os.path.join(tmp_dir, "original") ensure_dir(src_path, parent=False) self.send_directory(tmp_dir, src_path, use_tar_archives=False, split_size=None)
def test_send_empty_file_no_tar_split(self): with tempfile.TemporaryDirectory() as tmp_dir: src_path = os.path.join(tmp_dir, "original") ensure_dir(src_path, parent=False) open(os.path.join(src_path, "empty_file.txt"), "w").close() self.send_directory(tmp_dir, src_path, use_tar_archives=False, split_size=20000)
def prepare_directory_no_tar(self) -> Tuple[int, int]: logger.info("Preparing '%s' as multiple files…" % self.transfer_abspath) dir_abspath = self.transfer_abspath index_path = self.index_abspath if self.config.split_size: self.split_source_files(dir_abspath, self.config.split_size) total_files, total_size = 1, 0 ensure_dir(index_path) with open(index_path, "w") as fd: fd.write(HAIRGAP_MAGIC_NUMBER_INDEX) fd.write("[hairgap]\n") for k, v in sorted(self.get_attributes().items()): fd.write("%s = %s\n" % (k, v.replace("\n", ""))) if self.config.split_size: fd.write("[splitted_content]\n") fd.write("[files]\n") for root, dirnames, filenames in os.walk(dir_abspath): dirnames.sort() filenames.sort() for filename in filenames: file_abspath = os.path.join(root, filename) expected_sha256 = hashlib.sha256() if not os.path.isfile(file_abspath): continue filesize = os.path.getsize(file_abspath) with open(file_abspath, "rb") as in_fd: # start by checking special contents prefix = in_fd.read( len(HAIRGAP_MAGIC_NUMBER_INDEX.encode())) expected_sha256.update(prefix) for data in iter(lambda: in_fd.read(65536), b""): expected_sha256.update(data) # if the file starts with a special value, we must rewrite it entirely # to escape by HAIRGAP_MAGIC_NUMBER_ESCAPE # maybe not very efficient, but such files are expected to be small if prefix in HAIRGAP_PREFIXES: escaped_file_abspath = file_abspath + ".%s" % random.randint( 100000, 1000000 - 1) with open(escaped_file_abspath, "wb") as fd_out: fd_out.write(HAIRGAP_MAGIC_NUMBER_ESCAPE.encode()) with open(file_abspath, "rb") as fd_in: for data in iter(lambda: fd_in.read(65536), b""): fd_out.write(data) os.rename(escaped_file_abspath, file_abspath) total_size += filesize file_relpath = os.path.relpath(file_abspath, dir_abspath) fd.write("%s = %s\n" % (expected_sha256.hexdigest(), file_relpath)) total_files += 1 total_size += os.path.getsize(index_path) logger.info("%s file(s), %s byte(s), prepared in '%s'." % (total_files, total_size, self.transfer_abspath)) return total_files, total_size
def create_files( self, file_count=10, file_size=10000, ): ensure_dir(self.transfer_abspath, parent=False) for i in range(file_count): with open(os.path.join(self.transfer_abspath, "%08d.txt" % i), "w") as fd: fd.write("123456789\n" * file_size)
def process_received_file_no_tar(self, tmp_abspath: str, valid: bool = True): empty_prefix = HAIRGAP_MAGIC_NUMBER_EMPTY.encode() index_prefix = HAIRGAP_MAGIC_NUMBER_INDEX.encode() escape_prefix = HAIRGAP_MAGIC_NUMBER_ESCAPE.encode() if os.path.isfile(tmp_abspath): with open(tmp_abspath, "rb") as fd: prefix = fd.read(len(empty_prefix)) else: prefix = b"" if prefix == escape_prefix: # must be done before the sha256 escaped_tmp_abspath = tmp_abspath + ".b" with open(escaped_tmp_abspath, "wb") as fd_out: with open(tmp_abspath, "rb") as fd_in: fd_in.read(len(escape_prefix)) for data in iter(lambda: fd_in.read(65536), b""): fd_out.write(data) os.rename(escaped_tmp_abspath, tmp_abspath) # no need to use shutil.move if prefix == empty_prefix: open(tmp_abspath, "w").close() if prefix == index_prefix: self.read_index(tmp_abspath) os.remove(tmp_abspath) self.transfer_start() if self.expected_files.empty(): # empty transfer => we mark it as complete ensure_dir(self.get_current_transfer_directory(), parent=False) self.transfer_complete() elif self.expected_files.empty(): if valid: self.transfer_file_unexpected(tmp_abspath, prefix=prefix) elif os.path.isfile(tmp_abspath): os.remove(tmp_abspath) else: expected_sha256, file_relpath = self.expected_files.get() actual_sha256_obj = hashlib.sha256() if os.path.isfile(tmp_abspath): with open(tmp_abspath, "rb") as in_fd: for data in iter(lambda: in_fd.read(65536), b""): actual_sha256_obj.update(data) self.transfer_file_received( tmp_abspath, file_relpath, actual_sha256=actual_sha256_obj.hexdigest(), expected_sha256=expected_sha256, ) if self.expected_files.empty(): # all files of the transfer have been received if self.current_split_status: self.unsplit_received_files( self.config, self.get_current_transfer_directory()) self.transfer_complete()
def test_send_constants(self): with tempfile.TemporaryDirectory() as tmp_dir: src_path = os.path.join(tmp_dir, "original") ensure_dir(src_path, parent=False) for name, value in ( ("empty.txt", HAIRGAP_MAGIC_NUMBER_EMPTY), ("escape.txt", HAIRGAP_MAGIC_NUMBER_ESCAPE), ("index.txt", HAIRGAP_MAGIC_NUMBER_INDEX), ): with open(os.path.join(src_path, name), "w") as fd: fd.write("%s\n" % value) self.send_directory(tmp_dir, src_path)
def process_received_file_tar(self, tmp_abspath: str, valid: bool = True): """ process a tar.gz archive. a single file and a single directory are expected at the root of the received archive :param tmp_abspath: :param valid: :return: """ if not valid: if os.path.isfile(tmp_abspath): os.remove(tmp_abspath) return with tarfile.open(name=tmp_abspath, mode="r:gz") as tar_fd: index_member = None for member in tar_fd.getmembers(): # type: tarfile.TarInfo if "/" not in member.name and member.isfile(): index_member = member break if index_member is None: logger.error("index file not found in %s") return # /!\ the index file must be read before extracting other files with tempfile.NamedTemporaryFile() as dst_fd: src_fd = tar_fd.extractfile(index_member) for data in iter(lambda: src_fd.read(8192), b""): dst_fd.write(data) src_fd.close() dst_fd.flush() self.read_index(dst_fd.name) self.transfer_start() count = 0 for member in tar_fd.getmembers(): # type: tarfile.TarInfo if not member.isfile() or member.issym(): continue root, sep, rel_path = member.name.partition("/") if sep != "/": # the index file => we ignore it continue self.transfer_file_received( tmp_abspath, rel_path, expected_sha256=None, actual_sha256=None, tmp_fd=tar_fd.extractfile(member), ) count += 1 if count == 0: ensure_dir(self.get_current_transfer_directory(), parent=False) self.transfer_complete() os.remove(tmp_abspath)
def prepare_directory_tar(self) -> Tuple[int, int]: logger.info("Preparing '%s' as a single tar archive…" % self.transfer_abspath) ensure_dir(self.index_abspath) with open(self.index_abspath, "w") as fd: fd.write(HAIRGAP_MAGIC_NUMBER_INDEX) fd.write("[hairgap]\n") for k, v in sorted(self.get_attributes().items()): fd.write("%s = %s\n" % (k, v.replace("\n", ""))) total_size = 0 total_files = 1 if self.config.always_compute_size: total_size += os.path.getsize(self.index_abspath) for root, dirnames, filenames in os.walk(self.transfer_abspath): for filename in filenames: file_abspath = os.path.join(root, filename) if os.path.isfile(file_abspath): total_files += 1 total_size += os.path.getsize(file_abspath) logger.info("%s file(s), %s byte(s), prepared in '%s'." % (total_files, total_size, self.transfer_abspath)) return total_files, total_size
def receive_file(self, tmp_path) -> Optional[bool]: """receive a single file and returns True if hairgap did not raise an error False if hairgap did raise an error but Ctrl-C None if hairgap was terminated by Ctrl-C """ logger.info("Receiving %s via hairgap…" % tmp_path) ensure_dir(tmp_path, parent=True) with open(tmp_path, "wb") as fd: cmd = [ self.config.hairgapr_path, "-p", str(self.port or self.config.destination_port), ] if self.config.timeout_s: cmd += ["-t", str(self.config.timeout_s)] if self.config.mem_limit_mb: cmd += ["-m", str(self.config.mem_limit_mb)] cmd.append(self.config.destination_ip) self.hairgap_subprocess = subprocess.Popen(cmd, stdout=fd, stderr=subprocess.PIPE) logger.debug("hairgapr command: %s" % " ".join(cmd)) __, stderr = self.hairgap_subprocess.communicate() fd.flush() returncode = self.hairgap_subprocess.returncode if returncode == 0: self.hairgap_subprocess = None logger.info("%s received via hairgap." % tmp_path) return True if returncode == -2: logger.info("Exiting hairgap…") return None else: logger.warning("An error %d was encountered by hairgap: \n%s" % (returncode, stderr.decode())) self.hairgap_subprocess = None return False
def unsplit_received_files(config: Config, dir_abspath): names = os.listdir(dir_abspath) if not names: return folder_1 = os.path.join(dir_abspath, str(uuid.uuid4())) folder_2 = os.path.join(dir_abspath, str(uuid.uuid4())) ensure_dir(folder_1, parent=False) ensure_dir(folder_2, parent=False) for name in names: os.rename(os.path.join(dir_abspath, name), os.path.join(folder_1, name)) names.sort() cat_cmd = [config.cat] + names tar_cmd = [config.tar, "xz", "-C", folder_2] esc_tar_cmd = [shlex.quote(x) for x in tar_cmd] esc_cat_cmd = [shlex.quote(x) for x in cat_cmd] cmd = "%s | %s" % (" ".join(esc_cat_cmd), " ".join(esc_tar_cmd)) p = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, cwd=folder_1, ) stdout, stderr = p.communicate(b"") if p.returncode: logger.error("command = %s , return code = %s" % (cmd, p.returncode)) logger.error("stdout = %s\nstderr = %s" % (stdout.decode(), stderr.decode())) names = os.listdir(folder_2) for name in names: os.rename(os.path.join(folder_2, name), os.path.join(dir_abspath, name)) shutil.rmtree(folder_1) shutil.rmtree(folder_2)
def send_directory(args): with tempfile.TemporaryDirectory(dir=args.tmp_path) as dirname: config = Config( destination_ip=args.ip, destination_port=args.port, redundancy=args.redundancy, error_chunk_size=args.error_chunk_size, max_rate_mbps=args.max_rate_mbps, mtu_b=args.mtu_b, keepalive_ms=args.keepalive_ms, end_delay_s=args.delay_s, hairgaps=args.bin_path, ) copy_path = os.path.join(dirname, "data") index_path = os.path.join(dirname, "index.txt") source = args.source if os.path.isfile(source): ensure_dir(copy_path) shutil.copy(source, os.path.join(copy_path, os.path.basename(source))) else: shutil.copytree(source, copy_path) sender = SingleDirSender(config, data_path=copy_path, index_path=index_path) sender.prepare_directory() sender.send_directory()
def transfer_file_received( self, tmp_abspath, file_relpath, actual_sha256: Optional[str] = None, expected_sha256: Optional[str] = None, tmp_fd: io.BytesIO = None, ): """called when a file is received the execution time of this method must be small if threading is False (5 seconds between two communications) :param tmp_abspath: the path of the received file :param file_relpath: the destination path of the received file :param actual_sha256: actual SHA256 (not provided in case of tar archives) :param expected_sha256: expected SHA256 (not provided in case of tar archives) :param tmp_fd: provided when tmp_abspath is not given :return: """ if tmp_fd: receive_path = self.get_current_transfer_directory() self.transfer_received_count += 1 size = 0 if receive_path: file_abspath = os.path.join(receive_path, file_relpath) ensure_dir(file_abspath, parent=True) with open(file_abspath, "wb") as dst_fd: for data in iter(lambda: tmp_fd.read(8192), b""): dst_fd.write(data) size += len(data) tmp_fd.close() else: logger.warning("No receive path defined: ignoring %s." % file_relpath) elif os.path.isfile(tmp_abspath): size = os.path.getsize(tmp_abspath) self.transfer_received_count += 1 receive_path = self.get_current_transfer_directory() if receive_path: file_abspath = os.path.join(receive_path, file_relpath) ensure_dir(file_abspath, parent=True) shutil.move(tmp_abspath, file_abspath) else: logger.warning("No receive path defined: removing %s." % tmp_abspath) os.remove(tmp_abspath) else: size = 0 self.transfer_received_size += size values = { "f": file_relpath, "as": actual_sha256, "es": expected_sha256, "s": size, } if actual_sha256 == expected_sha256: logger.info("Received file %(f)s [sha256=%(es)s, size=%(s)s]." % values) self.transfer_success_count += 1 else: logger.warning( "Received file %(f)s [sha256=%(as)s instead of sha256=%(es)s, size=%(s)s]." % values) self.transfer_error_count += 1