def buildpcfg(passwd_dictionary, start=0, end=-1): #MIN_COUNT=1000 R = RuleSet() # resource track out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2' resource_tracker = 5240 for n, line in enumerate(open_(passwd_dictionary)): if n < start: continue if n > end: break if n > resource_tracker: l = check_resource(n) if not l: break else: resource_tracker += l # if n%1000==0: print n; line = line.strip().split() if len(line) > 1 and line[0].isdigit(): w, c = ' '.join(line[1:]), int(line[0]) else: continue w, c = ' '.join(line), 1 try: w.decode('ascii') except UnicodeDecodeError: continue # not ascii hence return if c < MIN_COUNT: # or (len(w) > 2 and not w[:-2].isalnum() and len(re.findall(allowed_sym, w)) == 0): print "Word frequency dropped to %d for %s" % (c, w), n break # Careful!!! T = parse(w) R.update_set(T.rule_set(), with_freq=True, freq=c) if end > 0: return R R.save(bz2.BZ2File(out_grammar_fl, 'wb'))
def buildOurpcfg(filename): """ 函数:pcfg训练函数 功能:将 参数定义: filename :用于存放训练集的位置 rule_set:存放规则以及其频率的类 """ # 准备好用于存放规则的类 rule_set = RuleSet() # 然后打开文件 fp = open(filename, 'r') # 设定最大的可读取行数 max_line = 2555000 for i, line in enumerate(fp): if i > max_line: break # 开始进行检测(规定第一个是密码,第二个是出现次数) line, n = line.strip().split(' ') p = parse(line) # 设置规则 rule_set.update_set(p.rule_set(), with_freq=True, freq=int(n)) # 完成训练,进行存档 rule_set.save(bz2.BZ2File("temp1.cfg", "wb"))
def buildpcfg(passwd_dictionary, start=0, end=-1): #MIN_COUNT=1000 R = RuleSet() # resource track out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2' resource_tracker = 5240 for n, line in enumerate(open_(passwd_dictionary)): if n<start: continue if n>end: break if n>resource_tracker: l = check_resource(n) if not l: break else: resource_tracker += l # if n%1000==0: print n; line = line.strip().split() if len(line) > 1 and line[0].isdigit(): w, c = ' '.join(line[1:]), int(line[0]) else: continue w, c = ' '.join(line), 1 try: w.decode('ascii') except UnicodeDecodeError: continue # not ascii hence return if c < MIN_COUNT : # or (len(w) > 2 and not w[:-2].isalnum() and len(re.findall(allowed_sym, w)) == 0): print "Word frequency dropped to %d for %s" % (c, w), n break # Careful!!! T = parse(w) R.update_set(T.rule_set(), with_freq=True, freq=c) if end>0: return R R.save(bz2.BZ2File(out_grammar_fl, 'wb'))
def parallel_buildpcfg(password_dictionary): from multiprocessing import Pool p = Pool() Complete_grammar = RuleSet() load_each = 10000 a = [(password_dictionary, c, c + load_each) for c in range(0, 10**6, load_each)] R = p.map(wraper_buildpcfg, a) for r in R: Complete_grammar.update_set(r, with_freq=True) out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2' Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))
def parallel_buildpcfg(password_dictionary): from multiprocessing import Pool p = Pool() Complete_grammar = RuleSet() load_each = 10000 a = [(password_dictionary, c, c+load_each) for c in range(0, 10**6, load_each)] R = p.map(wraper_buildpcfg, a) for r in R: Complete_grammar.update_set(r, with_freq=True) out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2' Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))