def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Base32 encoded bytes-like object or ASCII `data` string using the base32 chars set. Be default the standard chars set is used. See https://tools.ietf.org/html/rfc4648#section-6 :param _input: Base32 encoded (bytes) string :param kwargs: Arbitrary keyword arguments :keyword charset: Alphabet of 32 chars to use for decoding :return: `None` if `data` couldn't be decoded, else decoded byte string """ charset = kwargs.get("charset", CHARSETS["std"]) if len(charset) != 32: raise AssertionError("Only full chars set can be defined") _input = convert_to_bytes(_input) if (re.search(convert_to_bytes(r"[^{}=\r\n]".format(charset)), _input) is not None): return None if charset != CHARSETS["std"]: # https://stackoverflow.com/questions/5537750/decode-base64-like-string-with-different-index-tables tbl = bytes.maketrans(bytes(charset, "utf8"), bytes(CHARSETS["std"], "utf8")) _input = _input.translate(tbl) _input += b"=" * ((8 - len(_input) & 7) & 7) try: return base64.b32decode(_input) except: return None
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Base91 encoded bytes-like object or ASCII string. See http://base91.sourceforge.net/ :param _input: Base91 encoded (bytes) string :param kwargs: :keyword charset: Optional custom alphabet of 91 characters :return: `None` if `_input` couldn't be decoded, else decoded bytes string """ charset = kwargs.get("charset", CHARSET) assert len(charset) == 91 try: if not isinstance(_input, str): _input = str_from_bytes(_input).strip() except: return None if ( re.search( "[^" + charset + "]", _input, ) is not None ): return None if charset != CHARSET: _input = _input.translate(str.maketrans(charset, CHARSET)) try: return convert_to_bytes(base91.decode(_input)) except Exception: return None
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Base64 encoded bytes-like object or ASCII `data` string using the base64 chars set. Be default the standard chars set with the special chars "+/" is used. See https://tools.ietf.org/html/rfc4648#section-4 :param _input: Base64 encoded (bytes) string :param kwargs: Arbitrary keyword arguments :keyword charset: Defines alternative full chars set of 64 chars :return: `None` if `data` couldn't be decoded, else decoded byte string """ charset = kwargs.get("charset", CHARSETS_BASE64["std"]) if len(charset) != 64: raise AssertionError( "Only full chars set or special chars set can be defined" ) if isinstance(_input, str): _input = convert_to_bytes(_input) if charset != CHARSETS_BASE64["std"]: # https://stackoverflow.com/questions/5537750/decode-base64-like-string-with-different-index-tables tbl = bytes.maketrans( convert_to_bytes(charset), convert_to_bytes(CHARSETS_BASE64["std"]) ) _input = _input.translate(tbl) _input += b"=" * ((4 - len(_input) & 3) & 3) try: base64.b64decode(_input[:64], altchars=charset[-2:], validate=True) except: return None try: return base64.b64decode( b"".join(p.strip() for p in _input.splitlines()), altchars=charset[-2:], validate=True, ) except: return None
def __decompress_to_data_list(data): if not data: return [], "" data = convert_to_bytes(data) mime = guess_mime(data) try: if mime == "application/x-tar": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "tar" if mime == "application/gzip": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "gzip+tar" if mime == "application/x-xz": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "xz+tar" if mime == "application/x-bzip2": with tarfile.open(fileobj=io.BytesIO(data)) as tf: return [tf.extractfile(f).read() for f in tf.getmembers()], "bzip2+tar" except: pass try: if mime == "application/gzip": return [gzip.decompress(data)], "gzip" if mime in ("application/zip", "application/epub+zip"): with zipfile.ZipFile(io.BytesIO(data)) as zf: return [zf.read(f) for f in zf.infolist()], "zip" if mime == "application/x-brotli": return [brotli.decompress(data)], "brotli" if mime == "application/x-bzip2": return [bz2.decompress(data)], "bzip2" if mime == "application/x-xz": return [lzma.decompress(data)], "xz" if mime in ("application/x-lzip", "application/x-lzma"): return [lzma.decompress(data)], "lzma" except: pass try: # brotli has no standard magic numbers yet, try decompress data anyway return [brotli.decompress(data)], "brotli" except: pass return [data], ""
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Z85 encoded bytes-like object or ASCII `data` string. :param _input: Z85 encoded (bytes) string :param kwargs: :return: `None` if `data` couldn't be decoded, else decoded byte string' """ try: _input = convert_to_bytes(_input).strip() if (re.search( rb"[^0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.\-:+=^!/*?&<>()[]{}@%\$#]", _input, ) is not None): return None return z85.decode(_input) except: return None
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Ascii85 encoded bytes-like object or ASCII `data` string. :param _input: Ascii85 encoded (bytes) string :param kwargs: Arbitrary keyword arguments :return: `None` if `data` couldn't be decoded, else decoded byte string """ if not _input: return None _input = convert_to_bytes(_input).strip() try: if _input[:2] == b"<~" and _input[-2:] == b"~>": return base64.a85decode(_input, adobe=True) return base64.a85decode(_input, adobe=False, foldspaces=True) except: return None
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes Base58 encoded bytes-like object or ASCII `data` string using the charset defined for Bitcoin addresses. Using https://rosettacode.org/wiki/Bitcoin/address_validation#Python :param _input: Base58 encoded (bytes) string :param kwargs: Arbitrary keyword arguments :keyword charset: Alphabet for base58 decoding. Use Bitcoin alphabet by default :return: Decode bytes string. Returns `None` if `data` couldn't be decoded. """ charset = kwargs.get("charset", CHARSETS["bitcoin"]) assert len(charset) == 58 try: return base58.b58decode(_input, alphabet=convert_to_bytes(charset)) except: return None
def run(cls, _input: Decodable, **kwargs) -> Optional[bytes]: """Decodes quoted-printable encoded bytes-like object or a string. :param _input: String or bytes :param kwargs: :return: Bytes string if decoded successfully, else None """ try: _input = convert_to_bytes(_input) except: return None if (re.search(rb"[^ -~\s]", _input) or re.search(rb"=(?:[0-9a-f]{2}|[0-9A-F]{2})", _input) is None): return None try: return quopri.decodestring(_input) except Exception: return None
def add_update_item_to_out(item): h = hashlib.sha1() h.update(convert_to_bytes(item["value"])) key_ = h.hexdigest() if key_ not in extracted_values: extracted_values[key_] = item if "frequency" not in extracted_values[key_]: extracted_values[key_]["frequency"] = 0 extracted_values[key_]["frequency"] += 1 if "positions" not in extracted_values[key_]: extracted_values[key_]["positions"] = [] if item.get("position"): extracted_values[key_]["positions"].append(item["position"]) if "position" in extracted_values[key_]: del extracted_values[key_]["position"] if "contexts" not in extracted_values[key_]: extracted_values[key_]["contexts"] = set() if item.get("context"): extracted_values[key_]["contexts"].add(item["context"]) if "context" in extracted_values[key_]: del extracted_values[key_]["context"]