class rule_seq_match(redact_rule): """Redacts any sequence that matches the given pattern""" def __init__(self, line, lgpattern): redact_rule.__init__(self, line) logging.debug("Creating lightgrep-based rule for pattern: "+lgpattern) self.lg = Lightgrep() self.accum = HitAccumulator() self.lgpattern = lgpattern self.complete = False pats = [(lgpattern, ['US-ASCII', 'UTF-8', 'UTF-16LE', 'ISO-8859-1'], KeyOpts(fixedString=False, caseInsensitive=True))] prog, pmap = Lightgrep.createProgram(pats) self.lg.createContext(prog, pmap, self.accum.lgCallback) def should_redact(self, fileobject): hitcount = self.lg.searchBuffer(fileobject.contents(), self.accum) return hitcount > 0 def runs_to_redact(self, fi): """Overridden to return the byte runs of just the given text""" red_seqs = [] for h in self.accum.Hits: new = True for seq in red_seqs: if seq[0] == h['start'] and seq[1] == h['end']: new = False break if new: red_seqs.append((h['start'], h['end'])) self.lg.reset() self.accum.reset() return get_runs_for_file_sequences(fi, red_seqs)
def __init__(self, line, lgpattern): redact_rule.__init__(self, line) logging.debug("Creating lightgrep-based rule for pattern: "+lgpattern) self.lg = Lightgrep() self.accum = HitAccumulator() self.lgpattern = lgpattern self.complete = False pats = [(lgpattern, ['US-ASCII', 'UTF-8', 'UTF-16LE', 'ISO-8859-1'], KeyOpts(fixedString=False, caseInsensitive=True))] prog, pmap = Lightgrep.createProgram(pats) self.lg.createContext(prog, pmap, self.accum.lgCallback)
def __init__(self, line, text): redact_rule.__init__(self, line) logging.debug("Creating lightgrep-based rule for fixed string: "+text) self.lg = Lightgrep() self.accum = HitAccumulator() self.complete = False # Note reliance on fixedString keyopt below pats = [(text, ['US-ASCII', 'UTF-8', 'UTF-16LE', 'ISO-8859-1'], KeyOpts(fixedString=True, caseInsensitive=False))] prog, pmap = Lightgrep.createProgram(pats) self.lg.createContext(prog, pmap, self.accum.lgCallback)