Exemple #1
0
def prepare():
    """Prepare tagger for run.

    This should be after installation to initialize tagger's resources.
    """
    import nltk
    import requests
    from libarchive import extract_memory
    import os
    from shutil import move
    from f8a_tagger.utils import get_files_dir

    nltk.download("punkt")
    nltk.download("wordnet")

    maven_index_checker_url = 'https://github.com/fabric8-analytics/' \
                              'maven-index-checker/files/1275145/' \
                              'maven-index-checker-v0.1-alpha.zip'
    response = requests.get(maven_index_checker_url)
    if response.ok is not True:
        raise RemoteDependencyMissingError(
            "Failed to download maven-index-checker with "
            "response code %s", response.status_code)

    # Unfortunately no way how to know name or path of extracted file,
    # so assume it's maven-index-checker.jar
    jar_name = "maven-index-checker.jar"

    jar_path = get_files_dir()
    extract_memory(response.content)
    move(jar_name, os.path.join(jar_path, jar_name))
def test_buffers(tmpdir):

    # Collect information on what should be in the archive
    tree = treestat('libarchive')

    # Create an archive of our libarchive/ directory
    buf = bytes(bytearray(1000000))
    with libarchive.memory_writer(buf, 'gnutar', 'xz') as archive:
        archive.add_files('libarchive/')

    # Read the archive and check that the data is correct
    with libarchive.memory_reader(buf) as archive:
        check_archive(archive, tree)

    # Extract the archive in tmpdir and check that the data is intact
    with in_dir(tmpdir.strpath):
        flags = EXTRACT_OWNER | EXTRACT_PERM | EXTRACT_TIME
        libarchive.extract_memory(buf, flags)
        tree2 = treestat('libarchive')
        assert tree2 == tree
def test_buffers(tmpdir):

    # Collect information on what should be in the archive
    tree = treestat('libarchive')

    # Create an archive of our libarchive/ directory
    buf = bytes(bytearray(1000000))
    with libarchive.memory_writer(buf, 'gnutar', 'xz') as archive:
        archive.add_files('libarchive/')

    # Read the archive and check that the data is correct
    with libarchive.memory_reader(buf) as archive:
        check_archive(archive, tree)

    # Extract the archive in tmpdir and check that the data is intact
    with in_dir(tmpdir.strpath):
        flags = EXTRACT_OWNER | EXTRACT_PERM | EXTRACT_TIME
        libarchive.extract_memory(buf, flags)
        tree2 = treestat('libarchive')
        assert tree2 == tree
Exemple #4
0
    def archive_files(self, arch_file_bytes, arch_info, include_dirs=False):
        try:
            archive_type = ArchiveType(arch_info['type'])
        except Exception as ex:
            EnhancedOutput.print_error(
                "Missing fields in the config file: {}".format(ex))
            EnhancedOutput.print_warning("Returning original file.")
            EnhancedOutput.logging_error(
                "Error setting archive type: {}. Returning original file.".
                format(ex))
            return arch_file_bytes

        EnhancedOutput.print_size(arch_file_bytes)

        if len(arch_file_bytes) > archive_type.maxSize:
            EnhancedOutput.print_error("{} over allowed size".format(
                arch_info['type']))
            EnhancedOutput.logging_info("{} maxSize met {}".format(
                arch_info['type'], len(arch_file_bytes)))
            return arch_file_bytes

        tmp_dir = tempfile.mkdtemp()

        try:
            with in_dir(tmp_dir):
                flags = libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME
                libarchive.extract_memory(arch_file_bytes, flags)
        except Exception as exce:
            EnhancedOutput.print_error(
                "Can't extract file. Returning original one.")
            EnhancedOutput.logging_error(
                "Can't extract file: {}. Returning original one.".format(exce))
            return arch_file_bytes

        EnhancedOutput.print_info("{} file contents and info".format(
            arch_info['type']))
        EnhancedOutput.print_info("Compression: {}".format(
            arch_info['filter']))

        files_list = list()
        for dirname, dirnames, filenames in os.walk(tmp_dir):
            dirz = dirname.replace(tmp_dir, ".")
            print "\t{0}".format(dirz)
            if include_dirs:
                files_list.append(dirz)
            for f in filenames:
                fn = os.path.join(dirz, f)
                files_list.append(fn)
                print "\t{} {}".format(
                    fn,
                    os.lstat(os.path.join(dirname, f)).st_size)

        patch_count = 0
        patched = False
        tmp_archive = tempfile.NamedTemporaryFile()

        try:
            with libarchive.file_writer(tmp_archive.name, arch_info['format'],
                                        arch_info['filter']) as archive:
                for filename in files_list:
                    full_path = os.path.join(tmp_dir, filename)
                    EnhancedOutput.print_info(
                        ">>> Next file in archive: {}".format(filename))

                    if os.path.islink(
                            full_path) or not os.path.isfile(full_path):
                        EnhancedOutput.print_warning(
                            "{} is not a file, skipping.".format(filename))
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    if os.lstat(full_path).st_size >= long(self.file_size_max):
                        EnhancedOutput.print_warning(
                            "{} is too big, skipping.".format(filename))
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    # Check against keywords
                    keyword_check = False

                    if type(archive_type.blacklist) is str:
                        if archive_type.blacklist.lower() in filename.lower():
                            keyword_check = True
                    else:
                        for keyword in archive_type.blacklist:
                            if keyword.lower() in filename.lower():
                                keyword_check = True
                                continue

                    if keyword_check is True:
                        EnhancedOutput.print_warning(
                            "Archive blacklist enforced!")
                        EnhancedOutput.logging_info(
                            "Archive blacklist enforced on {}".format(
                                filename))
                        continue

                    if patch_count >= archive_type.patchCount:
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        EnhancedOutput.logging_info(
                            "Met archive config patch count limit. Adding original file."
                        )
                    else:
                        # create the file on disk temporarily for binaryGrinder to run on it
                        tmp = tempfile.NamedTemporaryFile()
                        shutil.copyfile(full_path, tmp.name)
                        tmp.flush()
                        patch_result = self.binary_injector(tmp.name)
                        if patch_result:
                            patch_count += 1
                            file2 = os.path.join(self.staging_folder,
                                                 os.path.basename(tmp.name))
                            EnhancedOutput.print_info(
                                "Patching complete, adding to archive file.")
                            # let's move the backdoored file to the final location
                            shutil.copyfile(file2, full_path)
                            EnhancedOutput.logging_info(
                                "{} in archive patched, adding to final archive"
                                .format(filename))
                            os.remove(file2)
                            patched = True
                        else:
                            EnhancedOutput.print_error("Patching failed")
                            EnhancedOutput.logging_error(
                                "{} patching failed. Keeping original file.".
                                format(filename))

                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        tmp.close()

        except Exception as exc:
            EnhancedOutput.print_error(
                "Error while creating the archive: {}. Returning the original file."
                .format(exc))
            EnhancedOutput.logging_error(
                "Error while creating the archive: {}. Returning original file."
                .format(exc))
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        if patched is False:
            EnhancedOutput.print_info(
                "No files were patched. Forwarding original file")
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        with open(tmp_archive.name, 'r+b') as f:
            ret = f.read()

        # cleanup
        shutil.rmtree(tmp_dir, ignore_errors=True)
        tmp_archive.close()

        EnhancedOutput.logging_info(
            "Patching complete for HOST: {} ({}), PATH: {}".format(
                self.flow.request.host, self.host_domain,
                self.flow.request.path))
        return ret
Exemple #5
0
    def archive_files(self, arch_file_bytes, arch_info, include_dirs=False):
        try:
            archive_type = ArchiveType(arch_info["type"])
        except Exception as ex:
            return arch_file_bytes

        if len(arch_file_bytes) > archive_type.maxSize:
            return arch_file_bytes

        tmp_dir = tempfile.mkdtemp()

        try:
            with in_dir(tmp_dir):
                flags = (
                    libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME
                )
                libarchive.extract_memory(arch_file_bytes, flags)
        except Exception as exce:
            return arch_file_bytes

        files_list = list()
        for dirname, dirnames, filenames in os.walk(tmp_dir):
            dirz = dirname.replace(tmp_dir, ".")
            if include_dirs:
                files_list.append(dirz)
            for f in filenames:
                fn = os.path.join(dirz, f)
                files_list.append(fn)

        patch_count = 0
        patched = False
        tmp_archive = tempfile.NamedTemporaryFile()

        try:
            with libarchive.file_writer(tmp_archive.name, arch_info["format"], arch_info["filter"]) as archive:
                for filename in files_list:
                    full_path = os.path.join(tmp_dir, filename)
                    if os.path.islink(full_path) or not os.path.isfile(full_path):
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    if os.lstat(full_path).st_size >= long(self.file_size_max):
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    # Check against keywords
                    keyword_check = False

                    if type(archive_type.blacklist) is str:
                        if archive_type.blacklist.lower() in filename.lower():
                            keyword_check = True
                    else:
                        for keyword in archive_type.blacklist:
                            if keyword.lower() in filename.lower():
                                keyword_check = True
                                continue

                    if keyword_check is True:
                        continue

                    if patch_count >= archive_type.patchCount:
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                    else:
                        # create the file on disk temporarily for binaryGrinder to run on it
                        tmp = tempfile.NamedTemporaryFile()
                        shutil.copyfile(full_path, tmp.name)
                        tmp.flush()

                        with stdout_redirect(StringIO.StringIO()) as new_stdout:
                            patch_result = self.binary_injector(tmp.name)
                        if patch_result:
                            patch_count += 1
                            file2 = os.path.join(self.staging_folder, os.path.basename(tmp.name))
                            # let's move the backdoored file to the final location
                            shutil.copyfile(file2, full_path)
                            os.remove(file2)
                            patched = True
                            self.context.log("Patching {}: done".format(filename))
                        else:
                            self.context.log("Patching {}: failed".format(filename), level="error")

                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        tmp.close()

        except Exception as exc:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        if patched is False:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        with open(tmp_archive.name, "r+b") as f:
            ret = f.read()

        # cleanup
        shutil.rmtree(tmp_dir, ignore_errors=True)
        tmp_archive.close()

        return ret
Exemple #6
0
    def archive_files(self, arch_file_bytes, arch_info, include_dirs=False):
        try:
            archive_type = ArchiveType(arch_info['type'])
        except Exception as ex:
            EnhancedOutput.print_error("Missing fields in the config file: {}".format(ex))
            EnhancedOutput.print_warning("Returning original file.")
            EnhancedOutput.logging_error("Error setting archive type: {}. Returning original file.".format(ex))
            return arch_file_bytes

        EnhancedOutput.print_size(arch_file_bytes)

        if len(arch_file_bytes) > archive_type.maxSize:
            EnhancedOutput.print_error("{} over allowed size".format(arch_info['type']))
            EnhancedOutput.logging_info("{} maxSize met {}".format(arch_info['type'], len(arch_file_bytes)))
            return arch_file_bytes

        tmp_dir = tempfile.mkdtemp()

        try:
            with in_dir(tmp_dir):
                flags = libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME
                libarchive.extract_memory(arch_file_bytes, flags)
        except Exception as exce:
            EnhancedOutput.print_error("Can't extract file. Returning original one.")
            EnhancedOutput.logging_error("Can't extract file: {}. Returning original one.".format(exce))
            return arch_file_bytes

        EnhancedOutput.print_info("{} file contents and info".format(arch_info['type']))
        EnhancedOutput.print_info("Compression: {}".format(arch_info['filter']))

        files_list = list()
        for dirname, dirnames, filenames in os.walk(tmp_dir):
            dirz = dirname.replace(tmp_dir, ".")
            print "\t{0}".format(dirz)
            if include_dirs:
                files_list.append(dirz)
            for f in filenames:
                fn = os.path.join(dirz, f)
                files_list.append(fn)
                print "\t{} {}".format(fn, os.lstat(os.path.join(dirname, f)).st_size)

        patch_count = 0
        patched = False
        tmp_archive = tempfile.NamedTemporaryFile()

        try:
            with libarchive.file_writer(tmp_archive.name, arch_info['format'], arch_info['filter']) as archive:
                for filename in files_list:
                    full_path = os.path.join(tmp_dir, filename)
                    EnhancedOutput.print_info(">>> Next file in archive: {}".format(filename))

                    if os.path.islink(full_path) or not os.path.isfile(full_path):
                        EnhancedOutput.print_warning("{} is not a file, skipping.".format(filename))
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    if os.lstat(full_path).st_size >= long(self.file_size_max):
                        EnhancedOutput.print_warning("{} is too big, skipping.".format(filename))
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    # Check against keywords
                    keyword_check = False

                    if type(archive_type.blacklist) is str:
                        if archive_type.blacklist.lower() in filename.lower():
                            keyword_check = True
                    else:
                        for keyword in archive_type.blacklist:
                            if keyword.lower() in filename.lower():
                                keyword_check = True
                                continue

                    if keyword_check is True:
                        EnhancedOutput.print_warning("Archive blacklist enforced!")
                        EnhancedOutput.logging_info("Archive blacklist enforced on {}".format(filename))
                        continue

                    if patch_count >= archive_type.patchCount:
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        EnhancedOutput.logging_info("Met archive config patch count limit. Adding original file.")
                    else:
                        # create the file on disk temporarily for binaryGrinder to run on it
                        tmp = tempfile.NamedTemporaryFile()
                        shutil.copyfile(full_path, tmp.name)
                        tmp.flush()
                        patch_result = self.binary_injector(tmp.name)
                        if patch_result:
                            patch_count += 1
                            file2 = os.path.join(self.staging_folder, os.path.basename(tmp.name))
                            EnhancedOutput.print_info("Patching complete, adding to archive file.")
                            # let's move the backdoored file to the final location
                            shutil.copyfile(file2, full_path)
                            EnhancedOutput.logging_info(
                                "{} in archive patched, adding to final archive".format(filename))
                            os.remove(file2)
                            patched = True
                        else:
                            EnhancedOutput.print_error("Patching failed")
                            EnhancedOutput.logging_error("{} patching failed. Keeping original file.".format(filename))

                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        tmp.close()

        except Exception as exc:
            EnhancedOutput.print_error(
                "Error while creating the archive: {}. Returning the original file.".format(exc))
            EnhancedOutput.logging_error("Error while creating the archive: {}. Returning original file.".format(exc))
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        if patched is False:
            EnhancedOutput.print_info("No files were patched. Forwarding original file")
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        with open(tmp_archive.name, 'r+b') as f:
            ret = f.read()

        # cleanup
        shutil.rmtree(tmp_dir, ignore_errors=True)
        tmp_archive.close()

        EnhancedOutput.logging_info(
            "Patching complete for HOST: {} ({}), PATH: {}".format(self.flow.request.host, self.host_domain,
                                                                   self.flow.request.path))
        return ret
    def archive_files(self, arch_file_bytes, arch_info, include_dirs=False):
        try:
            archive_type = ArchiveType(arch_info['type'])
        except Exception as ex:
            return arch_file_bytes

        if len(arch_file_bytes) > archive_type.maxSize:
            return arch_file_bytes

        tmp_dir = tempfile.mkdtemp()

        try:
            with in_dir(tmp_dir):
                flags = libarchive.extract.EXTRACT_OWNER | libarchive.extract.EXTRACT_PERM | libarchive.extract.EXTRACT_TIME
                libarchive.extract_memory(arch_file_bytes, flags)
        except Exception as exce:
            return arch_file_bytes

        files_list = list()
        for dirname, dirnames, filenames in os.walk(tmp_dir):
            dirz = dirname.replace(tmp_dir, ".")
            if include_dirs:
                files_list.append(dirz)
            for f in filenames:
                fn = os.path.join(dirz, f)
                files_list.append(fn)

        patch_count = 0
        patched = False
        tmp_archive = tempfile.NamedTemporaryFile()

        try:
            with libarchive.file_writer(tmp_archive.name, arch_info['format'],
                                        arch_info['filter']) as archive:
                for filename in files_list:
                    full_path = os.path.join(tmp_dir, filename)
                    if os.path.islink(
                            full_path) or not os.path.isfile(full_path):
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    if os.lstat(full_path).st_size >= long(self.file_size_max):
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        continue

                    # Check against keywords
                    keyword_check = False

                    if type(archive_type.blacklist) is str:
                        if archive_type.blacklist.lower() in filename.lower():
                            keyword_check = True
                    else:
                        for keyword in archive_type.blacklist:
                            if keyword.lower() in filename.lower():
                                keyword_check = True
                                continue

                    if keyword_check is True:
                        continue

                    if patch_count >= archive_type.patchCount:
                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                    else:
                        # create the file on disk temporarily for binaryGrinder to run on it
                        tmp = tempfile.NamedTemporaryFile()
                        shutil.copyfile(full_path, tmp.name)
                        tmp.flush()

                        with stdout_redirect(
                                StringIO.StringIO()) as new_stdout:
                            patch_result = self.binary_injector(tmp.name)
                        if patch_result:
                            patch_count += 1
                            file2 = os.path.join(self.staging_folder,
                                                 os.path.basename(tmp.name))
                            # let's move the backdoored file to the final location
                            shutil.copyfile(file2, full_path)
                            os.remove(file2)
                            patched = True
                            self.context.log(
                                "Patching {}: done".format(filename))
                        else:
                            self.context.log(
                                "Patching {}: failed".format(filename),
                                level="error")

                        with in_dir(tmp_dir):
                            archive.add_files(filename)
                        tmp.close()

        except Exception as exc:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        if patched is False:
            shutil.rmtree(tmp_dir, ignore_errors=True)
            tmp_archive.close()
            return arch_file_bytes

        with open(tmp_archive.name, 'r+b') as f:
            ret = f.read()

        # cleanup
        shutil.rmtree(tmp_dir, ignore_errors=True)
        tmp_archive.close()

        return ret