def run(cls, _input: Decodable, **kwargs) -> Union[Tuple[None, None], Tuple[str, List[int]]]: """Decodes Bech32 encoded bytes-like object or ASCII data string. :param _input: Data string to decode :param kwargs: Arbitrary keyword arguments :return: Tuple (hrp, data) with human-readable part and decoded data. Returns (None, None) if `data` is not valid bech32 encoded (bytes) string. """ try: if not isinstance(_input, str): _input = str_from_bytes(_input) except Exception: return None, None max_length = kwargs.get("max_length", 90) if (any(ord(x) < 33 or ord(x) > 126 for x in _input)) or (_input.lower() != _input and _input.upper() != _input): return None, None _input = _input.lower() pos = _input.rfind("1") if pos < 1 or pos + 7 > len(_input) or len(_input) > max_length: return None, None if any(x not in CHARSET for x in _input[pos + 1:]): return None, None hrp = _input[:pos] result = [CHARSET.find(x) for x in _input[pos + 1:]] if not cls.verify_checksum(hrp, result): return None, None return hrp, result[:-6]
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Base91 encoded bytes-like object or ASCII string. See http://base91.sourceforge.net/ :param _input: Base91 encoded (bytes) string :param kwargs: :keyword charset: Optional custom alphabet of 91 characters :return: `None` if `_input` couldn't be decoded, else decoded bytes string """ charset = kwargs.get("charset", CHARSET) assert len(charset) == 91 try: if not isinstance(_input, str): _input = str_from_bytes(_input).strip() except: return None if ( re.search( "[^" + charset + "]", _input, ) is not None ): return None if charset != CHARSET: _input = _input.translate(str.maketrans(charset, CHARSET)) try: return convert_to_bytes(base91.decode(_input)) except Exception: return None
def extract_patterns(data: str, extractor: str, **kwargs) -> List[Any]: """Finds patterns in input data via selected extractor. The type of pattern is defined by the extractor used. The extractor must be registered, i.e. it must be listed with :func:`list_extractors`. :param data: Data in which to look for patterns :param extractor: Name of a registered active extractor :param kwargs: Arbitrary keyword arguments for the extractor :return: List of found patterns """ if not __is_supported_extractor(extractor): raise ValueError("Invalid extractor name. Supported values: {}".format( list_extractors_names(active_only=True))) if not isinstance(data, str): data = str_from_bytes(data) return list(get_extractor(extractor)(data, **kwargs))
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Base32 encoded bytes-like object or ASCII `data` string using the chars set and rules as defined by Douglas Crockford. See https://www.crockford.com/base32.html :param _input: Base64 encoded (bytes) string :param kwargs: Arbitrary keyword arguments :return: `None` if `data` couldn't be decoded, else decoded byte string """ try: if not isinstance(_input, str): _input = str_from_bytes(_input) if (re.search(r"[^0123456789ABCDEFGHJKMNPQRSTVWXYZ]", _input.upper()) is not None): return None decoded = base32_crockford.decode(_input, strict=True) return decoded.to_bytes((decoded.bit_length() + 7) // 8, byteorder="big") except: return None
def analyse(_input: Union[FileInputExtended, BytesIO, StringIO, str, bytes], decoders: Union[List[Tuple[str, dict]], List[str], str] = None, extractors: Union[List[Tuple[str, dict]], List[str], str] = None, **kwargs) -> List[dict]: """Common function to apply multiple decoders and multiple extractors on the input. Tries to decompress data first if recognized compression is applied. :param _input: File-like input (text or binary), see :func:`input_for_analysis` to create a suitable input. :param decoders: List of decoder (`decoder_name`, `decoder_args`, `decoder_kwargs`) to apply :param extractors: List of extractors (`extractor_name`, `extractor_args`, `extractor_kwargs`) to apply :return: List of dictionaries with the results for each input source """ def __read(_value): if isinstance(_value, str): _value = StringIO(_value) elif isinstance(_value, (bytes, bytearray)): _value = BytesIO(_value) if isinstance(_value, list): _value = FileInputExtended(_value, mode="rb") if isinstance(_value, FileInputExtended): return _value.read() return (_value.read(), ) def __add_patterns_to_out(_source: str, _format: str, _patterns: dict, _out: dict): item = _out.setdefault(_source, {}) item.setdefault("name", _source) item_formats = item.setdefault("formats", {}) if not _patterns and _format not in item_formats: item_formats[_format] = None return item_formats[_format] = item_formats.get(_format) or {} for k, v in _patterns.items(): item_formats[_format].setdefault("patterns", {}).setdefault(k, []).extend(v) if not decoders or decoders in ["auto", "all"]: decoders = [(dec_name, {}) for dec_name in list_decoders().keys()] if isinstance(decoders, str): decoders = [(decoders, {})] decoders = [d if isinstance(d, tuple) else (d, {}) for d in decoders] if not extractors or extractors in ["auto", "all"]: extractors = [(ex_name, {}) for ex_name in list_extractors().keys()] if isinstance(extractors, str): extractors = [(extractors, {})] extractors = [e if isinstance(e, tuple) else (e, {}) for e in extractors] exclusive_decoders_dict = __create_decoders_exclusivity() out = {} max_workers = kwargs.get("max_workers", None) if max_workers is None: max_workers = max([min([len(extractors), os.cpu_count() - 1]), 1]) with cf.ProcessPoolExecutor(max_workers=max_workers) as e: success_decode_extract = {} for data_read in __read(_input): try: source_name = _input.name except: source_name = "<data>" dl, dec_name_pre = __decompress_to_data_list(data_read) for data in dl: for dec in decoders: dec_name, dec_kwargs = dec skip_decoder = bool( exclusive_decoders_dict.get(dec_name, set()) & success_decode_extract.get(source_name, set())) if skip_decoder: continue data_list, dec_name_post = __decompress_to_data_list( decode(data, dec_name, **dec_kwargs)) for decoded_data in data_list: if not decoded_data: continue patterns = {} future_extracted = { e.submit( __extract_single, str_from_bytes(decoded_data), extractor, ): extractor[0] for extractor in extractors } for future in cf.as_completed(future_extracted): pattern_type = future_extracted[future] try: result = future.result() if result: patterns[pattern_type] = result success_decode_extract.setdefault( source_name, set()).add(dec_name) except: pass dec_name_final = "+".join(n for n in (dec_name_pre, dec_name, dec_name_post) if bool(n)) __add_patterns_to_out(source_name, dec_name_final, patterns, out) if source_name not in out: out[source_name] = { "name": source_name, "message": "Couldn't decode data nor find any patterns.", } return list(out.values())
def _extract_with_regex( _input, regex, validator=None, per_line=True, preprocess=None, postprocess=None, cached_values=None, data_kind=None, include_original=True, include_contexts=True, context_length=30, ): def create_item( value_, position_=None, original_=None, value_kind_=None, context_=None, **kwargs ): res = {"value": value_} if position_ is not None: res.update({"position": position_}) if original_ is not None: res.update({"original": original_}) if value_kind_ is not None: res.update({"value_kind": value_kind_}) if context_ is not None: res.update({"context": context_}) if kwargs: res.update(kwargs) return res def add_update_item_to_out(item): h = hashlib.sha1() h.update(convert_to_bytes(item["value"])) key_ = h.hexdigest() if key_ not in extracted_values: extracted_values[key_] = item if "frequency" not in extracted_values[key_]: extracted_values[key_]["frequency"] = 0 extracted_values[key_]["frequency"] += 1 if "positions" not in extracted_values[key_]: extracted_values[key_]["positions"] = [] if item.get("position"): extracted_values[key_]["positions"].append(item["position"]) if "position" in extracted_values[key_]: del extracted_values[key_]["position"] if "contexts" not in extracted_values[key_]: extracted_values[key_]["contexts"] = set() if item.get("context"): extracted_values[key_]["contexts"].add(item["context"]) if "context" in extracted_values[key_]: del extracted_values[key_]["context"] if not isinstance(_input, str): try: _input = str_from_bytes(_input) except: yield from () if cached_values is None: cached_values = set() cur_pos = 0 extracted_values = {} for part in _input.splitlines(keepends=True) if per_line else [_input]: if preprocess is not None: part = preprocess(part) for match in regex.finditer(part): value = match.group(0) if postprocess is not None: value = postprocess(value) orig_value = ( match.group(0) if include_original and match.group(0) != value else None ) context = None if include_contexts: context = ( _input[ max(cur_pos + match.start(0) - context_length, 0) : cur_pos + match.start(0) ] + ">>>>value<<<<" + _input[ cur_pos + match.end(0) : cur_pos + match.end(0) + context_length ] ) if cached_values is not None and value in cached_values: add_update_item_to_out( create_item( value, cur_pos + match.start(0), orig_value, value_kind_=data_kind, context_=context, ) ) continue try: if validator is not None and not validator(value): continue except: continue add_update_item_to_out( create_item( value, cur_pos + match.start(0), orig_value, value_kind_=data_kind, context_=context, ) ) if isinstance(cached_values, list): cached_values.append(value) if isinstance(cached_values, set): cached_values.add(value) cur_pos += len(part) yield from sorted( extracted_values.values(), key=lambda x: x.get("frequency"), reverse=True )