Beispiel #1
0
def buildpcfg(passwd_dictionary, start=0, end=-1):
    #MIN_COUNT=1000
    R = RuleSet()
    # resource track
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    resource_tracker = 5240
    for n, line in enumerate(open_(passwd_dictionary)):
        if n < start: continue
        if n > end: break
        if n > resource_tracker:
            l = check_resource(n)
            if not l:
                break
            else:
                resource_tracker += l
        # if n%1000==0: print n;
        line = line.strip().split()
        if len(line) > 1 and line[0].isdigit():
            w, c = ' '.join(line[1:]), int(line[0])
        else:
            continue
            w, c = ' '.join(line), 1
        try:
            w.decode('ascii')
        except UnicodeDecodeError:
            continue  # not ascii hence return
        if c < MIN_COUNT:  # or (len(w) > 2 and not w[:-2].isalnum() and len(re.findall(allowed_sym, w)) == 0):
            print "Word frequency dropped to %d for %s" % (c, w), n
            break  # Careful!!!
        T = parse(w)
        R.update_set(T.rule_set(), with_freq=True, freq=c)
    if end > 0: return R
    R.save(bz2.BZ2File(out_grammar_fl, 'wb'))
Beispiel #2
0
def buildOurpcfg(filename):
    """
    函数:pcfg训练函数
    功能:将
    参数定义:
    filename :用于存放训练集的位置
    rule_set:存放规则以及其频率的类
    """

    # 准备好用于存放规则的类
    rule_set = RuleSet()

    # 然后打开文件
    fp = open(filename, 'r')

    # 设定最大的可读取行数
    max_line = 2555000

    for i, line in enumerate(fp):
        if i > max_line:
            break

        # 开始进行检测(规定第一个是密码,第二个是出现次数)
        line, n = line.strip().split(' ')

        p = parse(line)

        # 设置规则
        rule_set.update_set(p.rule_set(), with_freq=True, freq=int(n))

    # 完成训练,进行存档

    rule_set.save(bz2.BZ2File("temp1.cfg", "wb"))
Beispiel #3
0
def buildpcfg(passwd_dictionary, start=0, end=-1):
    #MIN_COUNT=1000
    R = RuleSet()
    # resource track
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    resource_tracker = 5240
    for n, line in enumerate(open_(passwd_dictionary)):
        if n<start: continue
        if n>end: break
        if n>resource_tracker:
            l = check_resource(n)
            if not l:
                break
            else:
                resource_tracker += l
        # if n%1000==0: print n;
        line = line.strip().split()
        if len(line) > 1 and line[0].isdigit():
            w, c = ' '.join(line[1:]), int(line[0])
        else:
            continue
            w, c = ' '.join(line), 1
        try:
            w.decode('ascii')
        except UnicodeDecodeError:
            continue    # not ascii hence return
        if c < MIN_COUNT : # or (len(w) > 2 and not w[:-2].isalnum() and len(re.findall(allowed_sym, w)) == 0):
            print "Word frequency dropped to %d for %s" % (c, w), n
            break  # Careful!!!
        T = parse(w)
        R.update_set(T.rule_set(), with_freq=True, freq=c)
    if end>0: return R
    R.save(bz2.BZ2File(out_grammar_fl, 'wb'))
Beispiel #4
0
def parallel_buildpcfg(password_dictionary):
    from multiprocessing import Pool
    p = Pool()
    Complete_grammar = RuleSet()
    load_each = 10000
    a = [(password_dictionary, c, c + load_each)
         for c in range(0, 10**6, load_each)]
    R = p.map(wraper_buildpcfg, a)
    for r in R:
        Complete_grammar.update_set(r, with_freq=True)
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))
Beispiel #5
0
def parallel_buildpcfg(password_dictionary):
    from multiprocessing import Pool
    p = Pool()
    Complete_grammar = RuleSet()
    load_each = 10000
    a = [(password_dictionary, c, c+load_each)
         for c in range(0, 10**6, load_each)]
    R = p.map(wraper_buildpcfg, a)
    for r in R:
        Complete_grammar.update_set(r, with_freq=True)
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))