def parse_args(args): """ parse cmd line args """ # set running style if args.style in john_nick_names: RUNTIME_CONFIG.reset_to_jtr() else: RUNTIME_CONFIG.reset_to_hc() # set password_policy password_policy = PasswordPolicyConf(args.length, args.digit, args.letter, args.lower, args.upper) RUNTIME_CONFIG['password_policy'] = password_policy # parse pathes RUNTIME_CONFIG['wordlist_path'] = FilePath(args.wordlist_addr) RUNTIME_CONFIG['rulelist_path'] = FilePath(args.rulelist_addr) RUNTIME_CONFIG['pwlist_path'] = FilePath(args.pwlist_addr) # parse other flags # if args.enable_regex: #RUNTIME_CONFIG['enable_regex'] = True #print("Warning: Regex Is Slow and Only For Demo Purpose, Should Be Disabled in Real Running\n") if args.debug == True: RUNTIME_CONFIG['debug'] = True print("Enabling Extra Debug Information\n") if RUNTIME_CONFIG.is_hc() and password_policy.to_compact_string() != "": print( "Warning: Enabling Password Policy Is Slow in HC Mode and Only For Demo Purpose, Should Be Disabled in Real Running\n" )
def has_count_data(): """ Check if one configuration has generated data """ # Compute Hashes wordlist_hash = hashlib.md5( open(RUNTIME_CONFIG['wordlist_path']['addr'], 'rb').read()).hexdigest() # get md5 hash rulelist_hash = hashlib.md5( open(RUNTIME_CONFIG['rulelist_path']['addr'], 'rb').read()).hexdigest() # get md5 hash password_policy_string = RUNTIME_CONFIG['password_policy'].to_debug_string() type_j = "1" if RUNTIME_CONFIG.is_jtr() else "0" hash_file_addr = "{}/count_hashes.txt".format( RUNTIME_CONFIG['preprocess_path']) if os.path.exists(hash_file_addr): with open(hash_file_addr) as f: content = f.readlines() if len(content) >= 4 and content[0].strip( ) == wordlist_hash and content[1].strip() == rulelist_hash and content[ 2].strip("\r\n") == password_policy_string and content[3].strip( "\r\n") == type_j: return True return False
def forward_batched_words_and_batched_rules( wordlist_name, number_of_rule_batches, count_for_this_batch_of_words, preprocess_path, external_bash_process): """ HC only """ # Call JtR to Forward if RUNTIME_CONFIG.is_jtr(): raise Exception("Not Intended For JtR") # Call HC to Forward else: for rule_batch_id in range(number_of_rule_batches): cmd = RUNTIME_CONFIG[ 'executable_path'] + " {}/{} -r {}/rulesbatch{}.rule --stdout {} --no_filter_input --count_only\n".format( preprocess_path, wordlist_name, preprocess_path, rule_batch_id, RUNTIME_CONFIG['password_policy'].to_arg_string()) external_bash_process.stdin.write(bytes(cmd, 'utf-8')) external_bash_process.stdin.flush() line = external_bash_process.stdout.readline().decode() try: number = int(line.strip()) except: raise Exception("Parsing Number Error {}".format(line)) count_for_this_batch_of_words[rule_batch_id] += number
def read_wordlist(wordlist_name, wordlist_dir="../data/wordlists/", remove_hash=True): """ Read word into dict. Args: wordlist_name: the filename of the wordlist. wordlist_dir: the directory the wordlist is in. remove_hash: Whether to remove lines that start with "#!comment:". Returns: Dictionary of words. Indexing starts from 0. """ wordlist_addr = "{}/{}".format(wordlist_dir, wordlist_name) wordlist = {} if RUNTIME_CONFIG.is_jtr() else OrderedDict() with open(wordlist_addr) as f: for line in f: line = clean_word(line) if len(line) > RUNTIME_CONFIG['max_password_length']: print( "Oversize Word: {} in wordlist file, ignored".format(line)) if remove_hash == True and line.startswith('#!comment:'): print( "Comments: {} found in wordlist file, ignored".format(line)) if line == "" and RUNTIME_CONFIG.is_jtr( ): # JtR doesn't take in empty line. continue if line not in wordlist: wordlist[line] = len(wordlist) else: if line != "": print("Duplicate Word: {} in wordlist file, ignored".format( line)) return wordlist
def write_rules_to_file(out_addr, rules, batch_id): """ write a rule to file, HC only Args: out_addr: output directory rules: a list of rules batch_id: idx of the batch, starting from 0. Should not be used """ policy_in_rule_string = RUNTIME_CONFIG['password_policy'].to_rule_string( RUNTIME_CONFIG.is_jtr()) with open(out_addr, 'w+') as f: if RUNTIME_CONFIG.is_jtr(): raise Exception("Not Intended For JtR") else: for r in rules: f.write("{}\n".format( r.raw)) # HC use external checking for password policy
def main(): args = setup_args() # set up args try: parse_args(args) # parse args except: raise print("Your Running Configuration: {}\n".format(RUNTIME_CONFIG.short_config_string())) start_processing()
def write_rule_to_file(out_addr, rule, rule_idx): """ write a rule to file, based on JtR/HC style. Args: out_addr: output directory rule: the parsed rule rule_idx: idx of the rule, starting from 0. """ policy_in_rule_string = RUNTIME_CONFIG['password_policy'].to_rule_string( RUNTIME_CONFIG.is_jtr()) with open(out_addr, 'w+') as f: if RUNTIME_CONFIG.is_jtr(): f.write("[List.Rules:rule{}]\n".format(rule_idx)) f.write("{}{}\n".format( rule.raw, policy_in_rule_string)) # JTR embeds password policy in rules else: f.write("{}\n".format( rule.raw)) # HC use external checking for password policy
def create_character_classes_dict(): """ Create class_dict, which follows JtR's definition. Also used in HC. ?? matches "?" ?v matches vowels: "aeiouAEIOU" ?c matches consonants: "bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ" ?w matches whitespace: space and horizontal tabulation characters ?p matches punctuation: ".,:;'?!`" and the double quote character ?s matches symbols "$%^&*()-_+=|\<>[]{}#@/~" ?l matches lowercase letters [a-z] ?u matches uppercase letters [A-Z] ?d matches digits [0-9] ?a matches letters [a-zA-Z] ?x matches letters and digits [a-zA-Z0-9] ?z matches all characters """ # built dict d = { '?': '?', 'v': CHARS_VOWELS, 'c': CHARS_CONSONANTS, 'w': WHITESPACE, 'p': CHARS_PUNCTUATION, 's': CHARS_SPECIALS, 'l': CHARS_LOWER, 'u': CHARS_UPPER, 'd': CHARS_DIGITS, 'a': CHARS_LETTERS, 'x': CHARS_LETTERS + CHARS_DIGITS, 'z': set(chr(x) for x in range(256)) if RUNTIME_CONFIG.is_hc() else set( chr(x) for x in range(32, 127)) } for key in "VCWPSLUDAXZ": lower_key = key.lower() d[key] = "".join(i for i in PRINTABLES if i not in d[lower_key]) return {k: set(v) for k, v in d.items()}
def forward_a_rule_to_an_address(wordlist_addr, rule, out_addr, word_list_prefix="../data/wordlists", debug=False): """ Given a wordlist address, rule in string format, and out_address, call executable to enumerate the results. Args: wordlist_addr: the wordlist address rule: the parsed rule out_addr: output file address word_list_prefix: wordlist directory debug: debug mode, safe intermediate results. """ idx = 0 # tmp use os.remove(out_addr) if os.path.exists(out_addr) else None # cleaning # Prepare JtR running Config write_rule_to_file("tmp_rule_generate.lst", rule, idx) # Call JtR to Forward if RUNTIME_CONFIG.is_jtr(): cmd = RUNTIME_CONFIG[ 'executable_path'] + ' --config=tmp_rule_generate.lst --stdout --wordlist="{}/{}" --rules="rule{}"'.format( word_list_prefix, wordlist_addr, idx) # Call HC to Forward else: cmd = RUNTIME_CONFIG[ 'executable_path'] + ' {}/{} -r tmp_rule_generate.lst --stdout {}'.format( word_list_prefix, wordlist_addr, RUNTIME_CONFIG['password_policy'].to_arg_string()) with open(out_addr, 'w+') as fout: # context manager is OK since `call` blocks :) if platform != "win32": # platform spefic cmd subprocess.call( cmd, shell=True, stdout=fout, executable='/bin/bash') else: subprocess.call(cmd, stdout=fout) # Clean Workspace if debug == False: os.remove("tmp_rule_generate.lst") if os.path.exists( "tmp_rule_generate.lst") else None
def store_generated_data_hash(): """ Store hashes for one configuration """ # Compute Hashes wordlist_hash = hashlib.md5( open(RUNTIME_CONFIG['wordlist_path']['addr'], 'rb').read()).hexdigest() # get md5 hash rulelist_hash = hashlib.md5( open(RUNTIME_CONFIG['rulelist_path']['addr'], 'rb').read()).hexdigest() # get md5 hash password_policy_string = RUNTIME_CONFIG['password_policy'].to_debug_string() type_j = "1" if RUNTIME_CONFIG.is_jtr() else "0" hash_file_addr = "{}/hashes.txt".format(RUNTIME_CONFIG['preprocess_path']) f = open(hash_file_addr, 'w') f.write(wordlist_hash + "\n") # python will convert \n to os.linesep f.write(rulelist_hash + "\n") # python will convert \n to os.linesep f.write(password_policy_string + "\n") f.write(type_j + "\n") f.close()
def estimate_guess_number(counts, cumsum, word, rule_idx, wordlist): """ estimate guess number given word and rule and other info """ word_idx = wordlist[word] if RUNTIME_CONFIG.is_jtr(): # get lower and upper bound and estimate lower_bound = cumsum[rule_idx - 1] upper_bound = cumsum[rule_idx] estimated = lower_bound + int(counts[rule_idx] * ((word_idx + 1) * 1.0 / len(wordlist))) else: # specify which batch it is word_batch_number = word_idx // RUNTIME_CONFIG['batch_size_of_words'] rule_batch_number = rule_idx // RUNTIME_CONFIG['batch_size_of_rules'] # get lower and upper bound and estimate lower_bound = cumsum[ word_batch_number * len(RUNTIME_CONFIG['number_of_rules_in_each_batch']) + rule_batch_number - 1] upper_bound = cumsum[ word_batch_number * len(RUNTIME_CONFIG['number_of_rules_in_each_batch']) + rule_batch_number] # first part to estimate the number estimated_part_1 = int( counts[word_batch_number, rule_batch_number] * (word_idx % RUNTIME_CONFIG['batch_size_of_words']) / RUNTIME_CONFIG['number_of_words_in_each_batch'][word_batch_number]) estimated_part_2 = int( counts[word_batch_number, rule_batch_number] * ((rule_idx % RUNTIME_CONFIG['batch_size_of_rules']) + 1) / RUNTIME_CONFIG['number_of_words_in_each_batch'][word_batch_number] / RUNTIME_CONFIG['number_of_rules_in_each_batch'][rule_batch_number]) estimated = lower_bound + estimated_part_1 + estimated_part_2 return estimated, lower_bound, upper_bound
def start_processing(): """ Take in a wordlist, rulelist and test set, outputs the guessability and guess number of each pwd in the test set. Steps: 1. read rulelist and do precomputation (detect invertibility) 2. read wordlist/pwlist, and get count for each rule 3. Rule Inversion (for each rule, invert all pwds) """ stime = time.perf_counter() ##################### Precomputation and Other Preparation ##################### # initialize a bash exe for communication external_bash_process = Popen(['/bin/bash'], stdin=PIPE, stdout=PIPE) # Logging Basic Info logging.basicConfig(filename=RUNTIME_CONFIG.get_log_addr(),level=logging.DEBUG) logging.info("Starting Time: {}\n\nConfigurations: {}\n".format(time.strftime("%Y-%m-%d %H:%M"), RUNTIME_CONFIG.short_config_string())) logging.info("PasswordPolicy: {}\n".format(RUNTIME_CONFIG['password_policy'].to_debug_string())) print("Reading Rulelist\n") rulelist = read_rulelist(RUNTIME_CONFIG['rulelist_path']['name'], RUNTIME_CONFIG['rulelist_path']['prefix']) print("Start Precomputation\n") rulelist = precomputation(rulelist) print("Reading Wordlist and Password Set\n") wordlist = read_wordlist(RUNTIME_CONFIG['wordlist_path']['name'], RUNTIME_CONFIG['wordlist_path']['prefix']) # Computing Guess Count counts, cumsum = GuessCount.get_counts(wordlist, rulelist, RUNTIME_CONFIG['preprocess_path']) # read other things pwlist = read_passwords(RUNTIME_CONFIG['pwlist_path']['addr']) # filter out pwds not consistent with the policy not_filtered_pwds, filtered_pwds = filter_passwords_with_password_policy(pwlist) trie = build_trie_from_wordlist(wordlist) ##################### Start Inversion ##################### print("Start Inverting Rules\n") i_time = time.perf_counter() # guessability of pwds is_guessable = [False] * len(pwlist) is_enable_regex = RUNTIME_CONFIG['enable_regex'] is_debug = RUNTIME_CONFIG['debug'] lookup_threshold = RUNTIME_CONFIG['lookup_threshold'] # tokenize pwds once. tokenized_pwds = [TokenString(pwd) for pw_idx, pwd in not_filtered_pwds] # invert rules (with special memory handling and other staff) for r_idx, r in enumerate(rulelist): if is_debug == True: print(r.raw) if r.feasibility.is_invertible(): # invertible, if blow up, use trie for token_pwd, (pw_idx, pwd) in zip(tokenized_pwds,not_filtered_pwds): result = invert_one_rule(token_pwd,r,is_enable_regex,r.feasibility.special_idx) if result.is_normal(): if result.get_number_of_strings() <= lookup_threshold: ret_vals = match_inversion_result(result, wordlist) else: ret_vals = search_trie(result, trie) if len(ret_vals) != 0: is_guessable[pw_idx] = True for v in ret_vals: logging.info("\nPasswordIdx:{}\nPassword:{}\nRule:{}\nWord:{}\nGuess:{} ( {} - {} )\n".format(pw_idx, pwd, r.raw, v, *estimate_guess_number(counts, cumsum, v, r_idx, wordlist))) elif result.is_out_of_scope(): ret_vals = [] logging.info("Inversion error for {}(RL) {}(pw), error msg: {}\n".format(r.raw, pwd, "out_of_scope")) print("Inversion error for {}(RL) {}(pw), error msg: {}".format(r.raw, pwd, "out_of_scope")) else: ret_vals = [] logging.info("Inversion error for {}(RL) {}(pw), error msg: {}\n".format(r.raw, pwd, result.error_msg)) print("Inversion error for {}(RL) {}(pw), error msg: {}".format(r.raw, pwd, result.error_msg)) elif r.feasibility.is_optimizable(): # uninvertible, if cannot handle, binary # where the binary file is stored enumerated_data_addr = "{}/enumerated/rule{}.txt".format(RUNTIME_CONFIG['preprocess_path'],r_idx) for token_pwd, (pw_idx, pwd) in zip(tokenized_pwds,not_filtered_pwds): result = invert_one_rule(token_pwd,r,is_enable_regex) if result.is_normal(): if result.get_number_of_strings() <= lookup_threshold: ret_vals = match_inversion_result(result, wordlist) else: ret_vals = search_exist_data(pwd,enumerated_data_addr,external_bash_process) if len(ret_vals) != 0: is_guessable[pw_idx] = True for v in ret_vals: logging.info("\nPasswordIdx:{}\nPassword:{}\nRule:{}\nWord:{}\nGuess:{} ( {} - {} )\n".format(pw_idx, pwd, r.raw, v, *estimate_guess_number(counts, cumsum, v, r_idx, wordlist))) elif result.is_out_of_scope(): ret_vals = search_exist_data(pwd,enumerated_data_addr,external_bash_process) if len(ret_vals) != 0: is_guessable[pw_idx] = True for v in ret_vals: logging.info("\nPasswordIdx:{}\nPassword:{}\nRule:{}\nWord:{}\nGuess:{} ( {} - {} )\n".format(pw_idx, pwd, r.raw, v, *estimate_guess_number(counts, cumsum, v, r_idx, wordlist))) else: ret_vals = [] logging.info("Inversion error for {}(RL) {}(pw), error msg: {}\n".format(r.raw, pwd, result.error_msg)) print("Inversion error for {}(RL) {}(pw), error msg: {}".format(r.raw, pwd, result.error_msg)) else: # binary # where the binary file is stored enumerated_data_addr = "{}/enumerated/rule{}.txt".format(RUNTIME_CONFIG['preprocess_path'],r_idx) for token_pwd, (pw_idx, pwd) in zip(tokenized_pwds,not_filtered_pwds): ret_vals = search_exist_data(pwd,enumerated_data_addr,external_bash_process) if len(ret_vals) != 0: is_guessable[pw_idx] = True for v in ret_vals: logging.info("\nPasswordIdx:{}\nPassword:{}\nRule:{}\nWord:{}\nGuess:{} ( {} - {} )\n".format(pw_idx, pwd, r.raw, v, *estimate_guess_number(counts, cumsum, v, r_idx, wordlist))) ##################### End of Inversion ##################### # Write Not Guessable Data for pw_idx, pwd in filtered_pwds: logging.info("\nPasswordIdx:{}\nPassword:{}\nNot Guessable\n".format(pw_idx, pwd)) for is_guessed, (pw_idx, pwd) in zip(is_guessable, not_filtered_pwds): if is_guessed == False: logging.info("\nPasswordIdx:{}\nPassword:{}\nNot Guessable\n".format(pw_idx, pwd)) logging.info("Total guesses made by this configuration: {}\n".format(np.sum(counts))) print("Finished Inverting Rules, Total Time: {}".format(time.perf_counter()-i_time))
def get_name_of_a_rule(transformation): """ get a transformation, return its name in string. """ if transformation[0] == ":": return "colon" elif transformation[0] == "{": return "left_curly_bracket" elif transformation[0] == "}": return "right_curly_bracket" elif transformation[0] == "[": return "left_square_bracket" elif transformation[0] == "]": return "right_square_bracket" elif transformation[0] in "lucCtrdfqkKEPISVMQ46": return transformation[0] elif transformation[0] in "pRL": if RUNTIME_CONFIG.is_jtr(): # JTR return transformation[0] else: # ELSE HASHCAT, pN, LN, RN return "{}_N".format(transformation[0]) elif transformation[0] in "'": return "prime_N" elif transformation[0] in "TDzZyY": return "{}_N".format(transformation[0]) elif transformation[0] == "+": if RUNTIME_CONFIG.is_hc(): return "plus_N" else: return "mode" elif transformation[0] == "-": if RUNTIME_CONFIG.is_hc(): return "minus_N" else: return "flag" elif transformation[0] == ".": return "period_N" elif transformation[0] == ",": return "comma_N" elif transformation[0] == "$": return "dollar_X" elif transformation[0] == "^": return "caret_X" elif transformation[0] in "io": return "{}_N_X".format(transformation[0]) elif transformation[0] == "A": return "A_N_str" elif transformation[0] in "xO": return "{}_N_M".format(transformation[0]) elif transformation[0] == '*': return "asterisk_N_M" elif transformation[0] in "12": return "mode" elif transformation[0] == "X": return "X_N_M_I" elif transformation[0] == "v": return "v_V_N_M" elif transformation[0] == ">": return "greater_than_N" elif transformation[0] == "<": return "less_than_N" elif transformation[0] == "_": return "underscore_N" elif transformation[0] == "!": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 1] == "?": return "bang_question_C" else: return "bang_X" elif transformation[0] == "/": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 1] == "?": return "slash_question_C" else: return "slash_X" elif transformation[0] == "=": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 2] == "?": return "equal_N_question_C" else: return "equal_N_X" elif transformation[0] == "(": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 1] == "?": return "left_paren_question_C" else: return "left_paren_X" elif transformation[0] == ")": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 1] == "?": return "right_paren_question_C" else: return "right_paren_X" elif transformation[0] == "%": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 2] == "?": return "percent_N_question_C" else: return "percent_N_X" elif transformation[0] == "@": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 1] == "?": return "at_question_C" else: return "at_X" elif transformation[0] == "e": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 1] == "?": return "e_question_C" else: return "e_X" elif transformation[0] == "s": if RUNTIME_CONFIG[ 'running_style'] == RunningStyle.JTR and transformation[ 1] == "?": return "s_question_C_Y" else: return "s_X_Y" else: raise Exception( "Not implemented for transformation: {}".format(transformation))
def forward_a_rule_to_an_address_and_forward_count( wordlist_addr, rule, out_prefix, rule_idx, word_list_prefix="../data/wordlists", debug=False): """ forward a rule to file and get count as well """ idx = 0 # tmp use out_addr = "{}/enumerated/rule{}.txt".format(out_prefix, rule_idx) count_addr = "{}/count/rule{}.txt".format(out_prefix, rule_idx) tmp_1_file_addr = "{}/tmp_1.txt".format(out_prefix) os.remove(out_addr) if os.path.exists(out_addr) else None # cleaning os.remove(count_addr) if os.path.exists(count_addr) else None # cleaning os.remove(tmp_1_file_addr) if os.path.exists( tmp_1_file_addr) else None # cleaning # Prepare JtR running Config write_rule_to_file("tmp_rule_generate.lst", rule, idx) # Call JtR to Forward if RUNTIME_CONFIG.is_jtr(): cmd = RUNTIME_CONFIG[ 'executable_path'] + ' --config=tmp_rule_generate.lst --stdout --wordlist="{}/{}" --rules="rule{}"'.format( word_list_prefix, wordlist_addr, idx) # Call HC to Forward else: cmd = RUNTIME_CONFIG[ 'executable_path'] + ' {}/{} -r tmp_rule_generate.lst --stdout {}'.format( word_list_prefix, wordlist_addr, RUNTIME_CONFIG['password_policy'].to_arg_string()) with open(tmp_1_file_addr, 'w+') as fout: # context manager is OK since `call` blocks :) if platform != "win32": # platform spefic cmd subprocess.call( cmd, shell=True, stdout=fout, executable='/bin/bash') else: subprocess.call(cmd, stdout=fout) # Sort File sort_cmd = "LC_ALL=C sort " + tmp_1_file_addr with open(out_addr, 'w+') as fout: #context manager is OK since `call` blocks :) subprocess.call( sort_cmd, shell=True, stdout=fout, executable='/bin/bash') cmd_count = 'wc -l < {}'.format(out_addr) with open(count_addr, "w+") as fout: if platform != "win32": # platform spefic cmd subprocess.call( cmd_count, shell=True, stdout=fout, executable='/bin/bash') else: subprocess.call(cmd_count, stdout=fout) # Clean Workspace if debug == False: os.remove("tmp_rule_generate.lst") if os.path.exists( "tmp_rule_generate.lst") else None os.remove(tmp_1_file_addr) if os.path.exists( tmp_1_file_addr) else None # cleaning
def switch_to_jtr(self): """ switch context to JTR """ RUNTIME_CONFIG.reset_to_jtr() Dicts.classes['z'] = set(chr(x) for x in range(32, 127))
def switch_to_hc(self): """ switch context to HC """ RUNTIME_CONFIG.reset_to_hc() Dicts.classes['z'] = set(chr(x) for x in range(256))