예제 #1
0
    def split_source_files(self, dir_abspath: str, split_size: int):
        """transform some files into a single, splitted, archive

        move the content of the source folder in a subfolder
        create another folder in the same source folder
        create a tar.gz file with the first subfolder and split it into chunks into the second subfolder
        remove the first subfolder
        move the content of the second subfolder to its parent
        remove the second subfolder"""
        logger.info("Split '%s' into %s-bytes chunks" %
                    (dir_abspath, split_size))
        names = os.listdir(dir_abspath)
        if not names:
            return
        folder_1 = os.path.join(dir_abspath, str(uuid.uuid4()))
        folder_2 = os.path.join(dir_abspath, str(uuid.uuid4()))
        ensure_dir(folder_1, parent=False)
        for name in names:
            os.rename(os.path.join(dir_abspath, name),
                      os.path.join(folder_1, name))
        self.archive_and_split_directory(self.config,
                                         folder_1,
                                         folder_2,
                                         split_size=split_size)
        names = os.listdir(folder_2)
        shutil.rmtree(folder_1)
        for name in names:
            os.rename(os.path.join(folder_2, name),
                      os.path.join(dir_abspath, name))
        shutil.rmtree(folder_2)
예제 #2
0
 def archive_and_split_directory(
     config: Config,
     original_path: str,
     splitted_path: str,
     split_size: int = 100 * 1000 * 1000,
     prefix: str = "content.tar.gz.",
 ):
     ensure_dir(splitted_path, parent=False)
     tar_cmd = [config.tar, "czf", "-", "-C", original_path, "."]
     split_cmd = [
         config.split,
         "-b",
         str(split_size),
         "-",
         prefix,
     ]
     esc_tar_cmd = [shlex.quote(x) for x in tar_cmd]
     esc_split_cmd = [shlex.quote(x) for x in split_cmd]
     cmd = "%s | %s" % (" ".join(esc_tar_cmd), " ".join(esc_split_cmd))
     logger.info("Archive and split '%s' to '%s'…" %
                 (original_path, splitted_path))
     p = subprocess.Popen(
         cmd,
         shell=True,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         stdin=subprocess.PIPE,
         cwd=splitted_path,
     )
     stdout, stderr = p.communicate(b"")
     if p.returncode:
         logger.error("command = %s , return code = %s" %
                      (cmd, p.returncode))
         logger.error("stdout = %s\nstderr = %s" %
                      (stdout.decode(), stderr.decode()))
예제 #3
0
 def test_send_empty_dir_no_tar_no_split(self):
     with tempfile.TemporaryDirectory() as tmp_dir:
         src_path = os.path.join(tmp_dir, "original")
         ensure_dir(src_path, parent=False)
         self.send_directory(tmp_dir,
                             src_path,
                             use_tar_archives=False,
                             split_size=None)
예제 #4
0
 def test_send_empty_file_no_tar_split(self):
     with tempfile.TemporaryDirectory() as tmp_dir:
         src_path = os.path.join(tmp_dir, "original")
         ensure_dir(src_path, parent=False)
         open(os.path.join(src_path, "empty_file.txt"), "w").close()
         self.send_directory(tmp_dir,
                             src_path,
                             use_tar_archives=False,
                             split_size=20000)
예제 #5
0
    def prepare_directory_no_tar(self) -> Tuple[int, int]:
        logger.info("Preparing '%s' as multiple files…" %
                    self.transfer_abspath)
        dir_abspath = self.transfer_abspath
        index_path = self.index_abspath
        if self.config.split_size:
            self.split_source_files(dir_abspath, self.config.split_size)

        total_files, total_size = 1, 0
        ensure_dir(index_path)
        with open(index_path, "w") as fd:
            fd.write(HAIRGAP_MAGIC_NUMBER_INDEX)
            fd.write("[hairgap]\n")
            for k, v in sorted(self.get_attributes().items()):
                fd.write("%s = %s\n" % (k, v.replace("\n", "")))
            if self.config.split_size:
                fd.write("[splitted_content]\n")
            fd.write("[files]\n")
            for root, dirnames, filenames in os.walk(dir_abspath):
                dirnames.sort()
                filenames.sort()
                for filename in filenames:
                    file_abspath = os.path.join(root, filename)
                    expected_sha256 = hashlib.sha256()
                    if not os.path.isfile(file_abspath):
                        continue
                    filesize = os.path.getsize(file_abspath)
                    with open(file_abspath, "rb") as in_fd:
                        # start by checking special contents
                        prefix = in_fd.read(
                            len(HAIRGAP_MAGIC_NUMBER_INDEX.encode()))
                        expected_sha256.update(prefix)
                        for data in iter(lambda: in_fd.read(65536), b""):
                            expected_sha256.update(data)
                    # if the file starts with a special value, we must rewrite it entirely
                    # to escape by HAIRGAP_MAGIC_NUMBER_ESCAPE
                    # maybe not very efficient, but such files are expected to be small
                    if prefix in HAIRGAP_PREFIXES:
                        escaped_file_abspath = file_abspath + ".%s" % random.randint(
                            100000, 1000000 - 1)
                        with open(escaped_file_abspath, "wb") as fd_out:
                            fd_out.write(HAIRGAP_MAGIC_NUMBER_ESCAPE.encode())
                            with open(file_abspath, "rb") as fd_in:
                                for data in iter(lambda: fd_in.read(65536),
                                                 b""):
                                    fd_out.write(data)
                        os.rename(escaped_file_abspath, file_abspath)

                    total_size += filesize
                    file_relpath = os.path.relpath(file_abspath, dir_abspath)
                    fd.write("%s = %s\n" %
                             (expected_sha256.hexdigest(), file_relpath))
                    total_files += 1
        total_size += os.path.getsize(index_path)
        logger.info("%s file(s), %s byte(s), prepared in '%s'." %
                    (total_files, total_size, self.transfer_abspath))
        return total_files, total_size
예제 #6
0
 def create_files(
     self,
     file_count=10,
     file_size=10000,
 ):
     ensure_dir(self.transfer_abspath, parent=False)
     for i in range(file_count):
         with open(os.path.join(self.transfer_abspath, "%08d.txt" % i),
                   "w") as fd:
             fd.write("123456789\n" * file_size)
예제 #7
0
 def process_received_file_no_tar(self,
                                  tmp_abspath: str,
                                  valid: bool = True):
     empty_prefix = HAIRGAP_MAGIC_NUMBER_EMPTY.encode()
     index_prefix = HAIRGAP_MAGIC_NUMBER_INDEX.encode()
     escape_prefix = HAIRGAP_MAGIC_NUMBER_ESCAPE.encode()
     if os.path.isfile(tmp_abspath):
         with open(tmp_abspath, "rb") as fd:
             prefix = fd.read(len(empty_prefix))
     else:
         prefix = b""
     if prefix == escape_prefix:  # must be done before the sha256
         escaped_tmp_abspath = tmp_abspath + ".b"
         with open(escaped_tmp_abspath, "wb") as fd_out:
             with open(tmp_abspath, "rb") as fd_in:
                 fd_in.read(len(escape_prefix))
                 for data in iter(lambda: fd_in.read(65536), b""):
                     fd_out.write(data)
         os.rename(escaped_tmp_abspath,
                   tmp_abspath)  # no need to use shutil.move
     if prefix == empty_prefix:
         open(tmp_abspath, "w").close()
     if prefix == index_prefix:
         self.read_index(tmp_abspath)
         os.remove(tmp_abspath)
         self.transfer_start()
         if self.expected_files.empty():
             # empty transfer => we mark it as complete
             ensure_dir(self.get_current_transfer_directory(), parent=False)
             self.transfer_complete()
     elif self.expected_files.empty():
         if valid:
             self.transfer_file_unexpected(tmp_abspath, prefix=prefix)
         elif os.path.isfile(tmp_abspath):
             os.remove(tmp_abspath)
     else:
         expected_sha256, file_relpath = self.expected_files.get()
         actual_sha256_obj = hashlib.sha256()
         if os.path.isfile(tmp_abspath):
             with open(tmp_abspath, "rb") as in_fd:
                 for data in iter(lambda: in_fd.read(65536), b""):
                     actual_sha256_obj.update(data)
         self.transfer_file_received(
             tmp_abspath,
             file_relpath,
             actual_sha256=actual_sha256_obj.hexdigest(),
             expected_sha256=expected_sha256,
         )
         if self.expected_files.empty():
             # all files of the transfer have been received
             if self.current_split_status:
                 self.unsplit_received_files(
                     self.config, self.get_current_transfer_directory())
             self.transfer_complete()
예제 #8
0
 def test_send_constants(self):
     with tempfile.TemporaryDirectory() as tmp_dir:
         src_path = os.path.join(tmp_dir, "original")
         ensure_dir(src_path, parent=False)
         for name, value in (
             ("empty.txt", HAIRGAP_MAGIC_NUMBER_EMPTY),
             ("escape.txt", HAIRGAP_MAGIC_NUMBER_ESCAPE),
             ("index.txt", HAIRGAP_MAGIC_NUMBER_INDEX),
         ):
             with open(os.path.join(src_path, name), "w") as fd:
                 fd.write("%s\n" % value)
         self.send_directory(tmp_dir, src_path)
예제 #9
0
    def process_received_file_tar(self, tmp_abspath: str, valid: bool = True):
        """
        process a tar.gz archive.
        a single file and a single directory are expected at the root of the received archive

        :param tmp_abspath:
        :param valid:

        :return:
        """
        if not valid:
            if os.path.isfile(tmp_abspath):
                os.remove(tmp_abspath)
            return
        with tarfile.open(name=tmp_abspath, mode="r:gz") as tar_fd:
            index_member = None
            for member in tar_fd.getmembers():  # type: tarfile.TarInfo
                if "/" not in member.name and member.isfile():
                    index_member = member
                    break
            if index_member is None:
                logger.error("index file not found in %s")
                return
            # /!\ the index file must be read before extracting other files
            with tempfile.NamedTemporaryFile() as dst_fd:
                src_fd = tar_fd.extractfile(index_member)
                for data in iter(lambda: src_fd.read(8192), b""):
                    dst_fd.write(data)
                src_fd.close()
                dst_fd.flush()
                self.read_index(dst_fd.name)
            self.transfer_start()
            count = 0
            for member in tar_fd.getmembers():  # type: tarfile.TarInfo
                if not member.isfile() or member.issym():
                    continue
                root, sep, rel_path = member.name.partition("/")
                if sep != "/":  # the index file => we ignore it
                    continue
                self.transfer_file_received(
                    tmp_abspath,
                    rel_path,
                    expected_sha256=None,
                    actual_sha256=None,
                    tmp_fd=tar_fd.extractfile(member),
                )
                count += 1
            if count == 0:
                ensure_dir(self.get_current_transfer_directory(), parent=False)
            self.transfer_complete()
        os.remove(tmp_abspath)
예제 #10
0
 def prepare_directory_tar(self) -> Tuple[int, int]:
     logger.info("Preparing '%s' as a single tar archive…" %
                 self.transfer_abspath)
     ensure_dir(self.index_abspath)
     with open(self.index_abspath, "w") as fd:
         fd.write(HAIRGAP_MAGIC_NUMBER_INDEX)
         fd.write("[hairgap]\n")
         for k, v in sorted(self.get_attributes().items()):
             fd.write("%s = %s\n" % (k, v.replace("\n", "")))
     total_size = 0
     total_files = 1
     if self.config.always_compute_size:
         total_size += os.path.getsize(self.index_abspath)
         for root, dirnames, filenames in os.walk(self.transfer_abspath):
             for filename in filenames:
                 file_abspath = os.path.join(root, filename)
                 if os.path.isfile(file_abspath):
                     total_files += 1
                     total_size += os.path.getsize(file_abspath)
     logger.info("%s file(s), %s byte(s), prepared in '%s'." %
                 (total_files, total_size, self.transfer_abspath))
     return total_files, total_size
예제 #11
0
 def receive_file(self, tmp_path) -> Optional[bool]:
     """receive a single file and returns
     True if hairgap did not raise an error
     False if hairgap did raise an error but Ctrl-C
     None if hairgap was terminated by Ctrl-C
     """
     logger.info("Receiving %s via hairgap…" % tmp_path)
     ensure_dir(tmp_path, parent=True)
     with open(tmp_path, "wb") as fd:
         cmd = [
             self.config.hairgapr_path,
             "-p",
             str(self.port or self.config.destination_port),
         ]
         if self.config.timeout_s:
             cmd += ["-t", str(self.config.timeout_s)]
         if self.config.mem_limit_mb:
             cmd += ["-m", str(self.config.mem_limit_mb)]
         cmd.append(self.config.destination_ip)
         self.hairgap_subprocess = subprocess.Popen(cmd,
                                                    stdout=fd,
                                                    stderr=subprocess.PIPE)
         logger.debug("hairgapr command: %s" % " ".join(cmd))
         __, stderr = self.hairgap_subprocess.communicate()
         fd.flush()
     returncode = self.hairgap_subprocess.returncode
     if returncode == 0:
         self.hairgap_subprocess = None
         logger.info("%s received via hairgap." % tmp_path)
         return True
     if returncode == -2:
         logger.info("Exiting hairgap…")
         return None
     else:
         logger.warning("An error %d was encountered by hairgap: \n%s" %
                        (returncode, stderr.decode()))
     self.hairgap_subprocess = None
     return False
예제 #12
0
 def unsplit_received_files(config: Config, dir_abspath):
     names = os.listdir(dir_abspath)
     if not names:
         return
     folder_1 = os.path.join(dir_abspath, str(uuid.uuid4()))
     folder_2 = os.path.join(dir_abspath, str(uuid.uuid4()))
     ensure_dir(folder_1, parent=False)
     ensure_dir(folder_2, parent=False)
     for name in names:
         os.rename(os.path.join(dir_abspath, name),
                   os.path.join(folder_1, name))
     names.sort()
     cat_cmd = [config.cat] + names
     tar_cmd = [config.tar, "xz", "-C", folder_2]
     esc_tar_cmd = [shlex.quote(x) for x in tar_cmd]
     esc_cat_cmd = [shlex.quote(x) for x in cat_cmd]
     cmd = "%s | %s" % (" ".join(esc_cat_cmd), " ".join(esc_tar_cmd))
     p = subprocess.Popen(
         cmd,
         shell=True,
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         stdin=subprocess.PIPE,
         cwd=folder_1,
     )
     stdout, stderr = p.communicate(b"")
     if p.returncode:
         logger.error("command = %s , return code = %s" %
                      (cmd, p.returncode))
         logger.error("stdout = %s\nstderr = %s" %
                      (stdout.decode(), stderr.decode()))
     names = os.listdir(folder_2)
     for name in names:
         os.rename(os.path.join(folder_2, name),
                   os.path.join(dir_abspath, name))
     shutil.rmtree(folder_1)
     shutil.rmtree(folder_2)
예제 #13
0
def send_directory(args):
    with tempfile.TemporaryDirectory(dir=args.tmp_path) as dirname:
        config = Config(
            destination_ip=args.ip,
            destination_port=args.port,
            redundancy=args.redundancy,
            error_chunk_size=args.error_chunk_size,
            max_rate_mbps=args.max_rate_mbps,
            mtu_b=args.mtu_b,
            keepalive_ms=args.keepalive_ms,
            end_delay_s=args.delay_s,
            hairgaps=args.bin_path,
        )
        copy_path = os.path.join(dirname, "data")
        index_path = os.path.join(dirname, "index.txt")
        source = args.source
        if os.path.isfile(source):
            ensure_dir(copy_path)
            shutil.copy(source, os.path.join(copy_path, os.path.basename(source)))
        else:
            shutil.copytree(source, copy_path)
        sender = SingleDirSender(config, data_path=copy_path, index_path=index_path)
        sender.prepare_directory()
        sender.send_directory()
예제 #14
0
    def transfer_file_received(
        self,
        tmp_abspath,
        file_relpath,
        actual_sha256: Optional[str] = None,
        expected_sha256: Optional[str] = None,
        tmp_fd: io.BytesIO = None,
    ):
        """called when a file is received

        the execution time of this method must be small if threading is False (5 seconds between two communications)

        :param tmp_abspath: the path of the received file
        :param file_relpath: the destination path of the received file
        :param actual_sha256: actual SHA256 (not provided in case of tar archives)
        :param expected_sha256: expected SHA256 (not provided in case of tar archives)
        :param tmp_fd: provided when tmp_abspath is not given
        :return:
        """
        if tmp_fd:
            receive_path = self.get_current_transfer_directory()
            self.transfer_received_count += 1
            size = 0
            if receive_path:
                file_abspath = os.path.join(receive_path, file_relpath)
                ensure_dir(file_abspath, parent=True)
                with open(file_abspath, "wb") as dst_fd:
                    for data in iter(lambda: tmp_fd.read(8192), b""):
                        dst_fd.write(data)
                        size += len(data)
                    tmp_fd.close()
            else:
                logger.warning("No receive path defined: ignoring %s." %
                               file_relpath)
        elif os.path.isfile(tmp_abspath):
            size = os.path.getsize(tmp_abspath)
            self.transfer_received_count += 1
            receive_path = self.get_current_transfer_directory()
            if receive_path:
                file_abspath = os.path.join(receive_path, file_relpath)
                ensure_dir(file_abspath, parent=True)
                shutil.move(tmp_abspath, file_abspath)
            else:
                logger.warning("No receive path defined: removing %s." %
                               tmp_abspath)
                os.remove(tmp_abspath)
        else:
            size = 0
        self.transfer_received_size += size
        values = {
            "f": file_relpath,
            "as": actual_sha256,
            "es": expected_sha256,
            "s": size,
        }
        if actual_sha256 == expected_sha256:
            logger.info("Received file %(f)s [sha256=%(es)s, size=%(s)s]." %
                        values)
            self.transfer_success_count += 1
        else:
            logger.warning(
                "Received file %(f)s [sha256=%(as)s instead of sha256=%(es)s, size=%(s)s]."
                % values)
            self.transfer_error_count += 1