예제 #1
0
파일: bech32.py 프로젝트: espoem/MetExt
    def run(cls, _input: Decodable,
            **kwargs) -> Union[Tuple[None, None], Tuple[str, List[int]]]:
        """Decodes Bech32 encoded bytes-like object or ASCII data string.

        :param _input: Data string to decode
        :param kwargs: Arbitrary keyword arguments
        :return: Tuple (hrp, data) with human-readable part and decoded data.
        Returns (None, None) if `data` is not valid bech32 encoded (bytes) string.
        """
        try:
            if not isinstance(_input, str):
                _input = str_from_bytes(_input)
        except Exception:
            return None, None

        max_length = kwargs.get("max_length", 90)
        if (any(ord(x) < 33 or ord(x) > 126
                for x in _input)) or (_input.lower() != _input
                                      and _input.upper() != _input):
            return None, None
        _input = _input.lower()
        pos = _input.rfind("1")
        if pos < 1 or pos + 7 > len(_input) or len(_input) > max_length:
            return None, None
        if any(x not in CHARSET for x in _input[pos + 1:]):
            return None, None
        hrp = _input[:pos]
        result = [CHARSET.find(x) for x in _input[pos + 1:]]
        if not cls.verify_checksum(hrp, result):
            return None, None
        return hrp, result[:-6]
예제 #2
0
    def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]:
        """Decodes Base91 encoded bytes-like object or ASCII string.

        See http://base91.sourceforge.net/

        :param _input: Base91 encoded (bytes) string
        :param kwargs:
        :keyword charset: Optional custom alphabet of 91 characters
        :return: `None` if `_input` couldn't be decoded, else decoded bytes string
        """
        charset = kwargs.get("charset", CHARSET)
        assert len(charset) == 91

        try:
            if not isinstance(_input, str):
                _input = str_from_bytes(_input).strip()
        except:
            return None

        if (
            re.search(
                "[^" + charset + "]",
                _input,
            )
            is not None
        ):
            return None

        if charset != CHARSET:
            _input = _input.translate(str.maketrans(charset, CHARSET))

        try:
            return convert_to_bytes(base91.decode(_input))
        except Exception:
            return None
예제 #3
0
def extract_patterns(data: str, extractor: str, **kwargs) -> List[Any]:
    """Finds patterns in input data via selected extractor.
    The type of pattern is defined by the extractor used.
    The extractor must be registered, i.e. it must be listed with :func:`list_extractors`.

    :param data: Data in which to look for patterns
    :param extractor: Name of a registered active extractor
    :param kwargs: Arbitrary keyword arguments for the extractor
    :return: List of found patterns
    """
    if not __is_supported_extractor(extractor):
        raise ValueError("Invalid extractor name. Supported values: {}".format(
            list_extractors_names(active_only=True)))

    if not isinstance(data, str):
        data = str_from_bytes(data)

    return list(get_extractor(extractor)(data, **kwargs))
예제 #4
0
    def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]:
        """Decodes Base32 encoded bytes-like object or ASCII `data` string
        using the chars set and rules as defined by Douglas Crockford.

        See https://www.crockford.com/base32.html

        :param _input: Base64 encoded (bytes) string
        :param kwargs: Arbitrary keyword arguments
        :return: `None` if `data` couldn't be decoded, else decoded byte string
        """
        try:
            if not isinstance(_input, str):
                _input = str_from_bytes(_input)

            if (re.search(r"[^0123456789ABCDEFGHJKMNPQRSTVWXYZ]",
                          _input.upper()) is not None):
                return None
            decoded = base32_crockford.decode(_input, strict=True)
            return decoded.to_bytes((decoded.bit_length() + 7) // 8,
                                    byteorder="big")
        except:
            return None
예제 #5
0
def analyse(_input: Union[FileInputExtended, BytesIO, StringIO, str, bytes],
            decoders: Union[List[Tuple[str, dict]], List[str], str] = None,
            extractors: Union[List[Tuple[str, dict]], List[str], str] = None,
            **kwargs) -> List[dict]:
    """Common function to apply multiple decoders and multiple extractors on the input.
    Tries to decompress data first if recognized compression is applied.

    :param _input: File-like input (text or binary), see :func:`input_for_analysis` to create a suitable input.
    :param decoders: List of decoder (`decoder_name`, `decoder_args`, `decoder_kwargs`) to apply
    :param extractors: List of extractors (`extractor_name`, `extractor_args`, `extractor_kwargs`) to apply
    :return: List of dictionaries with the results for each input source
    """
    def __read(_value):
        if isinstance(_value, str):
            _value = StringIO(_value)
        elif isinstance(_value, (bytes, bytearray)):
            _value = BytesIO(_value)
        if isinstance(_value, list):
            _value = FileInputExtended(_value, mode="rb")
        if isinstance(_value, FileInputExtended):
            return _value.read()
        return (_value.read(), )

    def __add_patterns_to_out(_source: str, _format: str, _patterns: dict,
                              _out: dict):
        item = _out.setdefault(_source, {})
        item.setdefault("name", _source)
        item_formats = item.setdefault("formats", {})
        if not _patterns and _format not in item_formats:
            item_formats[_format] = None
            return
        item_formats[_format] = item_formats.get(_format) or {}
        for k, v in _patterns.items():
            item_formats[_format].setdefault("patterns",
                                             {}).setdefault(k, []).extend(v)

    if not decoders or decoders in ["auto", "all"]:
        decoders = [(dec_name, {}) for dec_name in list_decoders().keys()]
    if isinstance(decoders, str):
        decoders = [(decoders, {})]
    decoders = [d if isinstance(d, tuple) else (d, {}) for d in decoders]

    if not extractors or extractors in ["auto", "all"]:
        extractors = [(ex_name, {}) for ex_name in list_extractors().keys()]
    if isinstance(extractors, str):
        extractors = [(extractors, {})]
    extractors = [e if isinstance(e, tuple) else (e, {}) for e in extractors]

    exclusive_decoders_dict = __create_decoders_exclusivity()
    out = {}

    max_workers = kwargs.get("max_workers", None)
    if max_workers is None:
        max_workers = max([min([len(extractors), os.cpu_count() - 1]), 1])
    with cf.ProcessPoolExecutor(max_workers=max_workers) as e:
        success_decode_extract = {}
        for data_read in __read(_input):
            try:
                source_name = _input.name
            except:
                source_name = "<data>"
            dl, dec_name_pre = __decompress_to_data_list(data_read)
            for data in dl:
                for dec in decoders:
                    dec_name, dec_kwargs = dec
                    skip_decoder = bool(
                        exclusive_decoders_dict.get(dec_name, set())
                        & success_decode_extract.get(source_name, set()))
                    if skip_decoder:
                        continue

                    data_list, dec_name_post = __decompress_to_data_list(
                        decode(data, dec_name, **dec_kwargs))
                    for decoded_data in data_list:
                        if not decoded_data:
                            continue
                        patterns = {}
                        future_extracted = {
                            e.submit(
                                __extract_single,
                                str_from_bytes(decoded_data),
                                extractor,
                            ): extractor[0]
                            for extractor in extractors
                        }
                        for future in cf.as_completed(future_extracted):
                            pattern_type = future_extracted[future]
                            try:
                                result = future.result()
                                if result:
                                    patterns[pattern_type] = result
                                    success_decode_extract.setdefault(
                                        source_name, set()).add(dec_name)
                            except:
                                pass
                        dec_name_final = "+".join(n for n in (dec_name_pre,
                                                              dec_name,
                                                              dec_name_post)
                                                  if bool(n))
                        __add_patterns_to_out(source_name, dec_name_final,
                                              patterns, out)
            if source_name not in out:
                out[source_name] = {
                    "name": source_name,
                    "message": "Couldn't decode data nor find any patterns.",
                }

    return list(out.values())
예제 #6
0
파일: __init__.py 프로젝트: espoem/MetExt
def _extract_with_regex(
    _input,
    regex,
    validator=None,
    per_line=True,
    preprocess=None,
    postprocess=None,
    cached_values=None,
    data_kind=None,
    include_original=True,
    include_contexts=True,
    context_length=30,
):
    def create_item(
        value_,
        position_=None,
        original_=None,
        value_kind_=None,
        context_=None,
        **kwargs
    ):
        res = {"value": value_}
        if position_ is not None:
            res.update({"position": position_})
        if original_ is not None:
            res.update({"original": original_})
        if value_kind_ is not None:
            res.update({"value_kind": value_kind_})
        if context_ is not None:
            res.update({"context": context_})
        if kwargs:
            res.update(kwargs)
        return res

    def add_update_item_to_out(item):
        h = hashlib.sha1()
        h.update(convert_to_bytes(item["value"]))
        key_ = h.hexdigest()
        if key_ not in extracted_values:
            extracted_values[key_] = item
        if "frequency" not in extracted_values[key_]:
            extracted_values[key_]["frequency"] = 0
        extracted_values[key_]["frequency"] += 1
        if "positions" not in extracted_values[key_]:
            extracted_values[key_]["positions"] = []
        if item.get("position"):
            extracted_values[key_]["positions"].append(item["position"])
        if "position" in extracted_values[key_]:
            del extracted_values[key_]["position"]
        if "contexts" not in extracted_values[key_]:
            extracted_values[key_]["contexts"] = set()
        if item.get("context"):
            extracted_values[key_]["contexts"].add(item["context"])
        if "context" in extracted_values[key_]:
            del extracted_values[key_]["context"]

    if not isinstance(_input, str):
        try:
            _input = str_from_bytes(_input)
        except:
            yield from ()

    if cached_values is None:
        cached_values = set()

    cur_pos = 0
    extracted_values = {}
    for part in _input.splitlines(keepends=True) if per_line else [_input]:
        if preprocess is not None:
            part = preprocess(part)
        for match in regex.finditer(part):
            value = match.group(0)
            if postprocess is not None:
                value = postprocess(value)
            orig_value = (
                match.group(0) if include_original and match.group(0) != value else None
            )
            context = None
            if include_contexts:
                context = (
                    _input[
                        max(cur_pos + match.start(0) - context_length, 0) : cur_pos
                        + match.start(0)
                    ]
                    + ">>>>value<<<<"
                    + _input[
                        cur_pos + match.end(0) : cur_pos + match.end(0) + context_length
                    ]
                )
            if cached_values is not None and value in cached_values:
                add_update_item_to_out(
                    create_item(
                        value,
                        cur_pos + match.start(0),
                        orig_value,
                        value_kind_=data_kind,
                        context_=context,
                    )
                )
                continue
            try:
                if validator is not None and not validator(value):
                    continue
            except:
                continue
            add_update_item_to_out(
                create_item(
                    value,
                    cur_pos + match.start(0),
                    orig_value,
                    value_kind_=data_kind,
                    context_=context,
                )
            )

            if isinstance(cached_values, list):
                cached_values.append(value)
            if isinstance(cached_values, set):
                cached_values.add(value)
        cur_pos += len(part)
    yield from sorted(
        extracted_values.values(), key=lambda x: x.get("frequency"), reverse=True
    )