Beispiel #1
0
    def __init__(
        self,
        pe_path_or_bytes: Union[str, os.PathLike, BinaryIO],
        output_dir: os.PathLike = None,
        **kwargs,
    ) -> None:
        if isinstance(pe_path_or_bytes, str):
            pe_path_or_bytes: pathlib.Path = pathlib.Path(pe_path_or_bytes)
        if isinstance(pe_path_or_bytes, pathlib.Path):
            utils.check_read_access(pe_path_or_bytes)
            self.file_path = pe_path_or_bytes
            with self.file_path.open("rb") as input_file:
                self.file_contents = input_file.read()
        if isinstance(pe_path_or_bytes, io.BufferedIOBase):
            self.file_contents = pe_path_or_bytes.read()

        try:
            self.pe = pefile.PE(data=self.file_contents)
        except pefile.PEFormatError as e:
            raise TypeError(e)

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "file_path"):
                self.output_dir = self.file_path.parent / utils.slugify(
                    self.file_path.name + "_output")
            else:
                self.output_dir = pathlib.Path.cwd()
        utils.check_write_access(self.output_dir)
        self.kwargs = kwargs
Beispiel #2
0
    def __init__(
        self,
        zip_path_or_bytes: Union[str, pathlib.Path, BinaryIO],
        output_dir: pathlib.Path = None,
        **kwargs,
    ) -> None:
        """Construct a zip file artifact.

        Parameters
        ----------
        zip_path_or_bytes : Union[str, os.PathLike, BinaryIO]
            The path to the zip file, or a bytes-like object of a zip file in memory.
        output_dir : os.PathLike, optional
            Where any output extracted from this artifact should get dumped.
        **kwargs
            Any keyword arguments needed for the parsing of this artifact, or for
            parsing nested artifacts.

        Raises
        ------
        TypeError
            Will raise a TypeError if the zip_path_or_bytes item is not a zip archive.
        """
        if isinstance(zip_path_or_bytes, str):
            zip_path_or_bytes: Path = Path(zip_path_or_bytes)
        if isinstance(zip_path_or_bytes, Path):
            utils.check_read_access(zip_path_or_bytes)
            self.archive_path = zip_path_or_bytes
            input_file: BinaryIO
            with self.archive_path.open("rb") as input_file:
                self.archive_contents = input_file.read()
        if isinstance(zip_path_or_bytes, io.BufferedIOBase):
            self.archive_contents = zip_path_or_bytes.read()

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "archive_path"):
                self.output_dir = self.archive_path.parent / utils.slugify(
                    self.archive_path.name + "_output")
            else:
                self.output_dir = Path.cwd()

        self.kwargs = kwargs
        utils.check_write_access(self.output_dir.parent)
        if not self.validate_zip_archive():
            raise TypeError("[!] This is not a zip archive.")
Beispiel #3
0
    def __init__(
        self,
        file_path_or_bytes: Union[str, pathlib.Path, BinaryIO],
        output_dir: pathlib.Path = None,
        **kwargs,
    ) -> None:
        if isinstance(file_path_or_bytes, str):
            file_path_or_bytes: pathlib.Path = pathlib.Path(file_path_or_bytes)
        if isinstance(file_path_or_bytes, pathlib.Path):
            utils.check_read_access(file_path_or_bytes)
            self.file_path = file_path_or_bytes
            input_file: BinaryIO
            with self.file_path.open("rb") as input_file:
                self.file_contents = input_file.read()
        if isinstance(file_path_or_bytes, io.BufferedIOBase):
            self.file_contents = file_path_or_bytes.read()

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "file_path"):
                self.output_dir = self.file_path.parent / utils.slugify(
                    self.file_path.name + "_output")
            else:
                self.output_dir = pathlib.Path.cwd()
        utils.check_write_access(self.output_dir)

        if not self.validate_pyc_file():
            raise TypeError("[!] This is not a compiled Python file.")
        self.version_hint = kwargs.get("version_hint", None)
        if self.version_hint:
            try:
                self.magic_num = bytecode.version_str_to_magic_num_int(
                    self.version_hint)
            except Exception:
                raise RuntimeError(
                    f"Failed to produce magic number from version hint {self.version_hint}. Please try a different"
                    " version.")
Beispiel #4
0
def run(_args: List[str] = None) -> None:
    """Orchestrate the flow of the remap command.

    This is the entry-point of the remap command. It calls out to other routines
    and attempts to follow this high-level flow:

        1.  Check that program is running in sufficiently new Python
            environment, and parse any arguments
        2.  Determine what type of input was passed to program, which will
            ultimately decide what method remap uses to recover the opmap.
        3.  Attempt one of the opmap recovery methods (see documentation for
            more on these methods)
        4.  If the opmap was successfully recovered, validate it, then write
            it to a file.

    Parameters
    ----------
    _args : List[str]
        If this function is being called from other Python code, remap
        flags and other command-line options can be passed in as a list.
    """
    if sys.version_info < (3, 8):
        logger.critical(
            "[!] This tool can only be run in Python 3.8 or later.")
        sys.exit(1)
    utils.check_for_our_xdis()

    args: argparse.Namespace = _parse_args(_args)

    logging_options: Dict[str, Union[bool, os.PathLike]] = {
        "verbose": args.verbose,
        "quiet": args.quiet
    }
    pydecipher.set_logging_options(**logging_options)

    remapped_bytecode_path: pathlib.Path = pathlib.Path(
        args.remapped_bytecode_path).resolve()

    if args.output:
        output_dir: pathlib.Path = pathlib.Path(args.output.strip()).resolve()
    else:
        output_dir: pathlib.Path = pathlib.Path.cwd()
    output_dir = output_dir / f"remap_output_{utils.slugify(remapped_bytecode_path.name)}"

    # The following block sets up logging to a stringIO stream, which will
    # eventually be placed in a file. We don't immediately log to a file because
    # we don't want to leave a log file on disk unless the program succeeds.
    log_stream: io.StringIO = io.StringIO()
    log_stream__handler: logging.StreamHandler = logging.StreamHandler(
        log_stream)
    log_stream__handler.setFormatter(pydecipher.log_format)
    log_stream__handler.setLevel(logging.DEBUG)
    logger.addHandler(log_stream__handler)

    remappings: Dict[int, Dict[int, int]] = {}
    version: str = ""
    remapping_method: str = ""
    cli: str = " ".join(sys.argv) if not _args else " ".join(_args)
    if args.version:
        version = args.version
    if args.megafile:
        # Determine if argument is a version or a path
        if pathlib.Path(args.megafile).exists():
            standard_bytecode_path: pathlib.Path = pathlib.Path(args.megafile)
        else:
            potential_version: str = args.megafile
            magic_num: int = bytecode.version_str_to_magic_num_int(
                potential_version)
            if magic_num:
                compiled_file: str
                for compiled_file in os.listdir(
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled"):
                    full_path_obj: pathlib.Path = (
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled" / compiled_file)
                    infile: BinaryIO
                    with full_path_obj.open("rb") as infile:
                        if xdis.magics.magic2int(infile.read(4)) == magic_num:
                            logger.info(
                                f"[*] Found matching megafile for version {potential_version}"
                            )
                            standard_bytecode_path: pathlib.Path = full_path_obj
                            break
            if not standard_bytecode_path:
                logger.error(
                    "[!] Something went wrong. remap could not find a standard compiled version of this megafile."
                )  # Next, find the path of the reference file
                sys.exit(1)
        remappings, version = megafile_remap(standard_bytecode_path,
                                             remapped_bytecode_path)
        remapping_method = "Megafile"
    elif args.opcode_file:
        remappings, version = opcode_constants_remap(remapped_bytecode_path,
                                                     provided_version=version)
        remapping_method = "opcode.pyc constants-walking"
    elif args.standard_bytecode_path:
        standard_bytecode_path: pathlib.Path = pathlib.Path(
            args.standard_bytecode_path).resolve()
        utils.check_read_access(standard_bytecode_path)
        utils.check_read_access(remapped_bytecode_path)
        utils.check_write_access(output_dir)
        if not remapped_bytecode_path.is_dir():
            raise ValueError(
                "The standard/default remapping method requires a directory containing Python bytecode files"
            )
        if not standard_bytecode_path.is_dir():
            raise ValueError(
                "If you are going to provide your own reference opcode set, it must be a directory of "
                "Python bytecode files")
        remappings, version = standard_pyc_remap(standard_bytecode_path,
                                                 remapped_bytecode_path,
                                                 version=version)
        remapping_method = "Diff'ing against standard library bytecode"
    elif args.check_remapping:
        # Here, remapped_bytecode_path is not actually bytecode, its a remapping
        # file.
        utils.check_read_access(remapped_bytecode_path)
        remapping_file: TextIO
        with remapped_bytecode_path.open() as remapping_file:
            try:
                remapping_json: Dict["str", Union[str, int]] = json.loads(
                    remapping_file.read())
            except json.decoder.JSONDecodeError as e:
                e: json.decoder.JSONDecodeError
                logger.error(f"Could not read remapping file with error: {e}")
                sys.exit(1)
            version = remapping_json["python_version"]
            remappings_list: Dict[str, Union[
                bool, str, int]] = remapping_json["remapped_opcodes"]
            remapping_dict: Dict[str, int] = {
                d["opname"]: d["remapped_value"]
                for d in remappings_list
            }
            if bytecode.validate_opmap(version, remapping_dict):
                logger.info("[*] This opmap is valid.")
                return
            else:
                msg: str = "This opmap is not valid."
                if not logging_options["verbose"]:
                    msg += " Run with --verbose flag for more information."
                logger.warning(f"[!] {msg}")
                sys.exit(1)

    if remappings:
        remappings: Dict[int, int] = fix_remapping_conflicts(remappings)
        remappings: Dict[int,
                         Tuple[int,
                               bool]] = fill_opmap_gaps(remappings, version)
        output_file_path: pathlib.Path = write_remapping_file(
            remappings, version, remapping_method, cli, output_dir=output_dir)
        logger.info(
            f"[*] Remapping file {output_file_path.name} written to {output_file_path.parent}."
        )

        # If we successfully produced the remapping file, we want to also
        # include the logged output of remap.
        log_name: str = datetime.datetime.now().strftime(
            "log_%H_%M_%S_%b_%d_%Y.txt")
        log_file_ptr: TextIO
        with output_dir.joinpath(log_name).open("w") as log_file_ptr:
            log_file_ptr.write(log_stream.getvalue())
        logging_options: Dict[str, Union[bool, os.PathLike]] = {
            "log_path": output_dir.joinpath(log_name)
        }
        pydecipher.set_logging_options(**logging_options)
    else:
        logger.warning(
            "[!] Remap couldn't produce the new opmap. Run with --verbose for more information."
        )
        sys.exit(1)
Beispiel #5
0
def run(args_in: List[str] = None) -> None:
    """Orchestrate the flow of the pydecipher command.

    This function is the entry-point of the pydecipher command.  It calls out to
    other routines and generally attempts to follow this high-level flow:

        1.  Parse program arguments.
        2.  Check that input files are readable and output locations are
            writeable, including that the the program is running in a
            sufficiently new Python environment (3.6+).
        3.  Recursively call unpack on the artifact until all items of
            interest are extracted.
        4.  Decompile any Python bytecode found through the unpacking
            process.

    Parameters
    ----------
    args_in : List[str]
        If this function is being called from other Python code, pydecipher
        flags and other command-line options can be passed in as a list.
    """
    if sys.version_info < (3, 8):
        logger.critical(
            "[!] This tool can only be run in Python 3.8 or later.")
        sys.exit(1)
    utils.check_for_our_xdis()

    args: argparse.Namespace = _parse_args(args_in)

    logging_options: Dict[str, Union[bool, os.PathLike]] = {
        "verbose": args.verbose,
        "quiet": args.quiet
    }
    pydecipher.set_logging_options(**logging_options)

    artifact_path: pathlib.Path = pathlib.Path(args.artifact_path).resolve()
    utils.check_read_access(artifact_path)

    relocate_pys: bool = False
    pyc_files: Iterable[os.PathLike] = []
    if args.output:
        output_dir: pathlib.Path = pathlib.Path(args.output.strip()).resolve()
        if artifact_path.is_dir():
            relocate_pys = True
    elif artifact_path.is_dir():
        output_dir = artifact_path
        relocate_pys = True
    else:
        output_dir: pathlib.Path = (
            pathlib.Path.cwd() /
            f"pydecipher_output_{utils.slugify(artifact_path.name.split('.')[0])}"
        )

    if artifact_path.is_file() and os.path.splitext(
            artifact_path)[1].lower() in (".pyc", ".pyo"):
        relocate_pys = True
        pyc_files = [artifact_path]

    # The following block sets up logging to a stringIO stream, which will
    # eventually be placed in a file. We don't immediately log to a file
    # because we don't want to leave a log file on disk unless the program
    # succeeds, at least past the 'unpack' call.
    log_stream: io.StringIO = io.StringIO()
    log_stream__handler: logging.StreamHandler = logging.StreamHandler(
        log_stream)
    log_stream__handler.setFormatter(pydecipher.log_format)
    log_stream__handler.setLevel(logging.DEBUG)
    logger.addHandler(log_stream__handler)

    version_hint: str = args.version_hint

    alternate_opmap: Dict[str, int] = None
    if args.remapping_file:
        remap_file: pathlib.Path = pathlib.Path(args.remapping_file).resolve()
        logger.info(f"[*] Using remap file {remap_file}")
        utils.check_read_access(remap_file)
        alternate_opmap: Dict[str, int] = bytecode.create_opmap_from_file(
            remap_file)

        with remap_file.open("r") as remapping_file:
            file_json: str = json.loads(remapping_file.read())
            remap_file_version: str = file_json["python_version"]
            version_hint = remap_file_version

    utils.check_write_access(output_dir)
    # Dump all pyc files
    if artifact_path.is_dir():
        kwargs: Dict[str, str] = {"version_hint": version_hint}
        dirpath: str
        dirnames: List[str]
        filenames: List[str]
        for (dirpath, dirnames, filenames) in os.walk(artifact_path):
            filename: str
            for filename in filenames:
                if os.path.splitext(filename)[1].lower() in (".pyc", ".pyo"):
                    full_path: pathlib.Path = pathlib.Path(dirpath).joinpath(
                        filename)
                    try:
                        pyc_class_obj: artifact_types.pyc.Pyc = artifact_types.pyc.Pyc(
                            full_path, output_dir=full_path.parent, **kwargs)
                    except TypeError:
                        pass
                    else:
                        pyc_class_obj.unpack()
        pyc_files: List[pathlib.Path] = list(
            artifact_path.rglob("*.[pP][yY][cCoO]"))
    else:
        unpack(artifact_path,
               output_dir=str(output_dir),
               version_hint=version_hint)

    # If we produced files, we want to also include the logged output of
    # pydecipher. If we didn't produce anything, we can assume the program
    # failed/had uninteresting output that doesn't need to be kept. The one
    # exception to this is when we pass in a single pyc file, or a directory of
    # pyc files, to be decompiled.
    if (output_dir.exists() and os.listdir(output_dir)) or pyc_files:
        output_dir.mkdir(parents=True, exist_ok=True)
        log_name: str = datetime.datetime.now().strftime(
            "log_%H_%M_%S_%b_%d_%Y.txt")
        with output_dir.joinpath(log_name).open("w") as log_file_ptr:
            log_file_ptr.write(log_stream.getvalue())
        logging_options: Dict[str, pathlib.Path] = {
            "log_path": output_dir.joinpath(log_name)
        }
        pydecipher.set_logging_options(**logging_options)
    else:
        logger.warning("[!] This artifact produced no additional output.")
        return

    # Determine which pyc files to decompile
    if not pyc_files:
        pyc_files: Generator[os.PathLike, None,
                             None] = output_dir.rglob("*.[pP][yY][cCoO]")
        if not args.decompile_all:
            max_depth: int = 10
            # Search output directory with increasing recursive depth to find
            # first level of directories with .pyc files
            depth: int
            for depth in range(max_depth):
                tmp: List[os.PathLike] = list(
                    pydecipher.utils.rglob_limit_depth(output_dir,
                                                       "*.[pP][yY][cCoO]",
                                                       depth))
                if tmp:
                    pyc_files = tmp
                    break

    # Dispatch a pool of processes to decompile the specified group of pyc files
    bytecode.process_pycs(pyc_files, alternate_opmap=alternate_opmap)

    # If any decompiled python needs to be moved to the output directory, do
    # that now. This will only happen if the user passed in a pyc artifact
    # (single file or dir). We decompile the .pyc file into a .py file alongside
    # the .pyc file on disk, then move it to the designated output directory.
    if artifact_path.is_file():
        relative_root: pathlib.Path = artifact_path.parent
    else:
        relative_root: pathlib.Path = artifact_path
    if relocate_pys:
        pyc_file: pathlib.Path
        for pyc_file in pyc_files:
            py_file: pathlib.Path = pathlib.Path(str(pyc_file)[:-1])
            if not py_file.exists():
                continue
            rel_path: pathlib.Path = py_file.relative_to(relative_root)
            new_filepath: pathlib.Path = output_dir.joinpath(rel_path)
            py_file.rename(new_filepath)

    # Perform any cleanup functions on output of decompilation
    pydecipher.artifact_types.py2exe.PYTHONSCRIPT.cleanup(output_dir)