Example #1
0
def buildpcfg(passwd_dictionary, start=0, end=-1):
    #MIN_COUNT=1000
    R = RuleSet()
    # resource track
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    resource_tracker = 5240
    for n, line in enumerate(open_(passwd_dictionary)):
        if n<start: continue
        if n>end: break
        if n>resource_tracker:
            l = check_resource(n)
            if not l:
                break
            else:
                resource_tracker += l
        # if n%1000==0: print n;
        line = line.strip().split()
        if len(line) > 1 and line[0].isdigit():
            w, c = ' '.join(line[1:]), int(line[0])
        else:
            continue
            w, c = ' '.join(line), 1
        try:
            w.decode('ascii')
        except UnicodeDecodeError:
            continue    # not ascii hence return
        if c < MIN_COUNT : # or (len(w) > 2 and not w[:-2].isalnum() and len(re.findall(allowed_sym, w)) == 0):
            print "Word frequency dropped to %d for %s" % (c, w), n
            break  # Careful!!!
        T = parse(w)
        R.update_set(T.rule_set(), with_freq=True, freq=c)
    if end>0: return R
    R.save(bz2.BZ2File(out_grammar_fl, 'wb'))
Example #2
0
 def rule_set(self):
     rs = RuleSet()
     rs.add_rule('L', self.prod)
     if self.prod is 'l33t':
         for c,d in zip(self.l, self.r):
             rs.add_rule('L_%s'%c,d)
     return rs
Example #3
0
 def __init__(self, base_pcfg):
     self.cal_cdf = False
     R = RuleSet()
     self.base_pcfg = base_pcfg
     R.update_set(RuleSet(d={'L': self.base_pcfg['L']}))
     for c in string.ascii_lowercase:
         x = 'L_%s' % c
         R.update_set(RuleSet(d={x: self.base_pcfg[x]}))
     self.R = R
     self.G = R.G
     self.date = Date()
     self.freeze = False
Example #4
0
def parallel_buildpcfg(password_dictionary):
    from multiprocessing import Pool
    p = Pool()
    Complete_grammar = RuleSet()
    load_each = 10000
    a = [(password_dictionary, c, c+load_each)
         for c in range(0, 10**6, load_each)]
    R = p.map(wraper_buildpcfg, a)
    for r in R:
        Complete_grammar.update_set(r, with_freq=True)
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))
Example #5
0
 def __init__(self, base_pcfg):
     self.cal_cdf = False
     R = RuleSet()
     self.base_pcfg = base_pcfg
     R.update_set(RuleSet(d={'L': self.base_pcfg['L']}))
     for c in string.ascii_lowercase:
         x = 'L_%s' % c
         R.update_set(RuleSet(d={x: self.base_pcfg[x]}))
     self.R = R
     self.G = R.G
     self.date = Date()
     self.freeze = False
Example #6
0
 def rule_set(self):  # 添加规则
     rs = RuleSet()
     if isinstance(self, NonT):  # 基础规则  即没有任何规则
         rs.add_rule('G', self.sym)
     if isinstance(self.prod, str):
         rs.add_rule(self.sym, self.prod)
     elif isinstance(self.prod, list):
         for p in self.prod:
             rs.update_set(p.rule_set())
     else:
         return self.prod.rule_set()
     return rs
Example #7
0
 def rule_set(self):
     rs = RuleSet()
     if isinstance(self, NonT):
         rs.add_rule('G', self.sym)
     if isinstance(self.prod, basestring):
         rs.add_rule(self.sym, self.prod)
     elif isinstance(self.prod, list):
         for p in self.prod:
             rs.update_set(p.rule_set())
     else:
         return self.prod.rule_set()
     return rs
Example #8
0
 def rule_set(self):
     rs = RuleSet()
     rs.add_rule('L', self.prod)
     if self.prod is 'l33t':
         for c, d in zip(self.l, self.r):
             rs.add_rule('L_%s' % c, d)
     return rs
Example #9
0
def buildpcfg(passwd_dictionary, start=0, end=-1):
    #MIN_COUNT=1000
    R = RuleSet()
    # resource track
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    resource_tracker = 5240
    for n, line in enumerate(open_(passwd_dictionary)):
        if n < start: continue
        if n > end: break
        if n > resource_tracker:
            l = check_resource(n)
            if not l:
                break
            else:
                resource_tracker += l
        # if n%1000==0: print n;
        line = line.strip().split()
        if len(line) > 1 and line[0].isdigit():
            w, c = ' '.join(line[1:]), int(line[0])
        else:
            continue
            w, c = ' '.join(line), 1
        try:
            w.decode('ascii')
        except UnicodeDecodeError:
            continue  # not ascii hence return
        if c < MIN_COUNT:  # or (len(w) > 2 and not w[:-2].isalnum() and len(re.findall(allowed_sym, w)) == 0):
            print "Word frequency dropped to %d for %s" % (c, w), n
            break  # Careful!!!
        T = parse(w)
        R.update_set(T.rule_set(), with_freq=True, freq=c)
    if end > 0: return R
    R.save(bz2.BZ2File(out_grammar_fl, 'wb'))
Example #10
0
def buildOurpcfg(filename):
    """
    函数:pcfg训练函数
    功能:将
    参数定义:
    filename :用于存放训练集的位置
    rule_set:存放规则以及其频率的类
    """

    # 准备好用于存放规则的类
    rule_set = RuleSet()

    # 然后打开文件
    fp = open(filename, 'r')

    # 设定最大的可读取行数
    max_line = 2555000

    for i, line in enumerate(fp):
        if i > max_line:
            break

        # 开始进行检测(规定第一个是密码,第二个是出现次数)
        line, n = line.strip().split(' ')

        p = parse(line)

        # 设置规则
        rule_set.update_set(p.rule_set(), with_freq=True, freq=int(n))

    # 完成训练,进行存档

    rule_set.save(bz2.BZ2File("temp1.cfg", "wb"))
Example #11
0
 def rule_set(self):
     rs = RuleSet()
     if isinstance(self, NonT):
         rs.add_rule('G', self.sym)
     if isinstance(self.prod, basestring):
         rs.add_rule(self.sym, self.prod)
     elif isinstance(self.prod, list):
         for p in self.prod:
             rs.update_set(p.rule_set())
     else:
         return self.prod.rule_set()
     return rs
Example #12
0
def parallel_buildpcfg(password_dictionary):
    from multiprocessing import Pool
    p = Pool()
    Complete_grammar = RuleSet()
    load_each = 10000
    a = [(password_dictionary, c, c + load_each)
         for c in range(0, 10**6, load_each)]
    R = p.map(wraper_buildpcfg, a)
    for r in R:
        Complete_grammar.update_set(r, with_freq=True)
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))
Example #13
0
    def __init__(self, base_pcfg):
        self.cal_cdf = False
        R = RuleSet()
        self.base_pcfg = base_pcfg
        default_keys = []
        # default keys
        R.update_set(RuleSet(d={'L': self.base_pcfg['L']})) # L
        default_keys.append('L')
        for c in string.ascii_lowercase: # L_*
            x = 'L_%s' % c
            default_keys.append(x)
            R.update_set(RuleSet(d={x: self.base_pcfg[x]}))
        for k,v in self.base_pcfg['G'].items():
            if k.endswith(',G'):   # W1, D1, Y1
                R.G['G'][k] = v # W1 <-- W1,G
                R.G['G'][k[:-2]] = self.base_pcfg['G'][k[:-2]]
                default_keys.extend([k, k[:-2]])                
                R.update_set(RuleSet(d={k[:-2]: self.base_pcfg[k[:-2]]}))

        self._default_keys = set(default_keys)
        self.R = R
        self.G = R.G
        self.date = Date()
        self.freeze = False
Example #14
0
 def rule_set(self, word):
     rs = RuleSet()
     pt = self.l_parse_tree(word)
     for p in pt.tree:
         rs.add_rule(*p)
     return rs
Example #15
0
 def rule_set(self):
     if isinstance(self.prod, basestring):
         return RuleSet(self.sym, self.prod)
     else:
         return self.prod.rule_set()
Example #16
0
    a = [(password_dictionary, c, c+load_each)
         for c in range(0, 10**6, load_each)]
    R = p.map(wraper_buildpcfg, a)
    for r in R:
        Complete_grammar.update_set(r, with_freq=True)
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))


if __name__ == "__main__":
    if sys.argv[1] == '-buildG':
        print buildpcfg(sys.argv[2], 0, 100)
    elif sys.argv[1] == '-buildparallelG':
        parallel_buildpcfg(sys.argv[2])
    elif sys.argv[1]=='-file':
        R = RuleSet()
        with open_(sys.argv[2]) as f:
            for i, line in enumerate(f):
                if i<5000: continue
                l = line.strip().split()
                w, c = ' '.join(l[1:]), int(l[0])
                try: w.decode('ascii')
                except UnicodeDecodeError:
                    continue    # not ascii hence return
                if not w or len(w.strip())<1:
                    continue
                T = parse(w)
                R.update_set(T.rule_set(), with_freq=True, freq=c)
                if i%100==0: print i
                if i>5200: break
        print R
Example #17
0
 def rule_set(self):
     rs = RuleSet()
     rs.add_rule(self.sym, self.prod)
     rs.update_set(self.L.rule_set())
     return rs
Example #18
0
 def rule_set(self):
     rs = RuleSet()
     rs.add_rule(self.sym, self.prod)
     rs.update_set(self.L.rule_set())
     return rs
Example #19
0
 def rule_set(self, word):
     rs = RuleSet()
     pt = self.l_parse_tree(word)
     for p in pt.tree:
         rs.add_rule(*p)
     return rs
Example #20
0
    a = [(password_dictionary, c, c + load_each)
         for c in range(0, 10**6, load_each)]
    R = p.map(wraper_buildpcfg, a)
    for r in R:
        Complete_grammar.update_set(r, with_freq=True)
    out_grammar_fl = GRAMMAR_DIR + '/grammar.cfg.bz2'
    Complete_grammar.save(bz2.BZ2File(out_grammar_fl, 'wb'))


if __name__ == "__main__":
    if sys.argv[1] == '-buildG':
        print buildpcfg(sys.argv[2], 0, 100)
    elif sys.argv[1] == '-buildparallelG':
        parallel_buildpcfg(sys.argv[2])
    elif sys.argv[1] == '-file':
        R = RuleSet()
        with open_(sys.argv[2]) as f:
            for i, line in enumerate(f):
                if i < 5000: continue
                l = line.strip().split()
                w, c = ' '.join(l[1:]), int(l[0])
                try:
                    w.decode('ascii')
                except UnicodeDecodeError:
                    continue  # not ascii hence return
                if not w or len(w.strip()) < 1:
                    continue
                T = parse(w)
                R.update_set(T.rule_set(), with_freq=True, freq=c)
                if i % 100 == 0: print i
                if i > 5200: break
Example #21
0
    def __init__(self, base_pcfg):
        self.cal_cdf = False
        R = RuleSet()
        self.base_pcfg = base_pcfg
        default_keys = []
        # default keys
        R.update_set(RuleSet(d={'L': self.base_pcfg['L']})) # L
        default_keys.append('L')
        for c in string.ascii_lowercase: # L_*
            x = 'L_%s' % c
            default_keys.append(x)
            R.update_set(RuleSet(d={x: self.base_pcfg[x]}))
        for k,v in self.base_pcfg['G'].items():
            if k.endswith(',G'):   # W1, D1, Y1
                R.G['G'][k] = v # W1 <-- W1,G
                R.G['G'][k[:-2]] = self.base_pcfg['G'][k[:-2]]
                default_keys.extend([k, k[:-2]])                
                R.update_set(RuleSet(d={k[:-2]: self.base_pcfg[k[:-2]]}))

        self._default_keys = set(default_keys)
        self.R = R
        self.G = R.G
        self.date = Date()
        self.freeze = False