class Fa(InterfaceFa): def __init__(self, automaton): self.__automaton = automaton if self.automaton["deterministic"]: self.heritage = Dfa(automaton) elif not self.automaton["deterministic"]: self.heritage = Nfa(automaton) else: raise TypeError def read(self, word): return self.heritage.read(word) @property def automaton(self): return self.__automaton @property def states(self): return self.heritage.states @property def alphabet(self): return self.heritage.alphabet @property def dictionary(self): return self.heritage.dictionary def __repr__(self): return self.heritage.dictionary def __str__(self): return self.heritage.__str__()
def eval_nfa(target, fname, test, train, method, args): a = Nfa.parse(target) _, m = method(a, train, **args) with open(fname, 'w') as f: a.print(f) r = Nfa.eval_accuracy(target, fname, test) _,_,total, _, _, fp, tp = r.split(',') print(fname,round(int(fp) / int(total), 4),m,sep=',')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-o', '--output', type=str, metavar='FILE', default="automaton.fa", help='output file') parser.add_argument('input', metavar='NFA', type=str) parser.add_argument('-s', '--sub', type=str, metavar='xNFA', help='check whether L(NFA) is a subset of L(xNFA)') args = parser.parse_args() if args.sub: jarfile = search_for_file('RABIT.jar') if jarfile == None: sys.stderr.write( 'Error: cannot find RABIT tool in this directory\n') sys.exit(1) aut1 = Nfa.parse(args.input) aut2 = Nfa.parse(args.sub) aut1.selfloop_to_finals() aut2.selfloop_to_finals() aut1_ba = tempfile.NamedTemporaryFile() aut2_ba = tempfile.NamedTemporaryFile() aut1.print(open(aut1_ba.name, 'w'), how='ba') aut2.print(open(aut2_ba.name, 'w'), how='ba') proc = 'java -jar ' + jarfile + ' ' + aut1_ba.name + ' ' + \ aut2_ba.name + ' -fast -finite' subprocess.call(proc.split()) else: jarfile = search_for_file('Reduce.jar') if jarfile == None: sys.stderr.write( 'Error: cannot find Reduce tool in this directory\n') sys.exit(1) if not args.output: sys.stderr.write('Error: no output specified\n') exit(1) ba_file = tempfile.NamedTemporaryFile() reduce_file = tempfile.NamedTemporaryFile() aut = Nfa.parse(args.input, 'fa') aut.extend_final_states() write_output(ba_file.name, aut.write(how='ba')) proc = "java -jar " + jarfile + " " + ba_file.name + \ " 10 -sat -finite -o " + reduce_file.name subprocess.call(proc.split()) aut = Nfa.parse(reduce_file.name, 'ba') aut.retrieve_final_states() write_output(args.output, aut.write())
def __init__(self, automaton): self.__automaton = automaton if self.automaton["deterministic"]: self.heritage = Dfa(automaton) elif not self.automaton["deterministic"]: self.heritage = Nfa(automaton) else: raise TypeError
def timbuk2fa(fname): nfa = Nfa() smap = dict() cnt = 0 def add_state(p): nonlocal smap nonlocal cnt if not p in smap: smap[p] = cnt cnt += 1 return smap[p] with open(fname, 'r') as f: ts = 0 for line in f: line = line[:-1] if line == "": continue if line.startswith("Final States"): for x in [x for x in line.split()][2:]: nfa._add_final_state(add_state(x)) if ts == 1: init = line.split()[-1] nfa._add_initial_state(add_state(init)) ts = 2 elif ts == 2: a, p, q = re.sub('["\(\)\->]', ' ', line).split() p = add_state(p) q = add_state(q) nfa._add_rule(p, q, int(a)) if line.startswith("Transitions"): ts = 1 return nfa
def main(): parser = argparse.ArgumentParser() parser.add_argument('input', metavar='NFA', type=str) parser.add_argument('output', metavar='OUT', type=str) args = parser.parse_args() tb_file = tempfile.NamedTemporaryFile() min_file = tempfile.NamedTemporaryFile() sys.stderr.write('Parsing FA file\n') aut = Nfa.parse(args.input) sys.stderr.write('Extending final states\n') sym = aut.extend_final_states() sys.stderr.write('Converting to timbuk format\n') fa2timbuk(aut, sym, tb_file.name) fa2timbuk(aut, sym, 'tmp2') sys.stderr.write('Parsing timbuk file\n') a = sbl.parse(tb_file.name) sys.stderr.write('Minimizing\n') a.minimize().print_automaton(min_file.name) sys.stderr.write('Converting to FA\n') aut = timbuk2fa(min_file.name) sys.stderr.write('Retrieving final states\n') aut.retrieve_final_states() write_output(args.output, aut.write()) sys.stderr.write('Saved as ' + args.output + '\n')
def main(): if len(sys.argv) < 3: raise SystemError('two arguments required: INPUT OUTPUT') a = Nfa.parse(sys.argv[1]) a.merge_redundant_states() with open(sys.argv[2], 'w') as f: a.print(f)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-o', '--output', type=str, metavar='FILE', default="Aut.dot", help='output file') parser.add_argument('input', metavar='NFA', type=str) parser.add_argument('-f', '--freq', type=str, help='heat map') parser.add_argument('-t', '--trans', action='store_true', help='show transition labels') parser.add_argument('-r', '--rules', type=int, help='number of rules to show') parser.add_argument('-d', '--depth', type=int, help='maximal depth of a state to display') parser.add_argument("--dpi", type=int, default=3000, help="output image dpi") args = parser.parse_args() aut = Nfa.parse(args.input) freq = None if args.freq: freq = get_freq(args.freq) states = set(aut.states) if args.rules: rules = list(aut.fin_pred().values())[0:args.rules] states = set([s for subl in rules for s in subl]) if args.depth: depth = aut.state_depth states = set(filter(lambda x: depth[x] < args.depth, states)) out = aut.write_dot(show_trans=args.trans, freq=freq, states=states, freq_scale=lambda x: math.log(x + 2), show_diff=0) write_output(args.output, out) image = args.output.split('.dot')[0] + '.png' prog = 'dot -Tpng ' + args.output + ' -Gdpi=' + str( args.dpi) + ' -o ' + image subprocess.call(prog.split()) prog = 'xdg-open ' + image subprocess.call(prog.split())
def testGetAllStayDests(): treeOfEpsNfaStr = """ q0->q1 : Eps q1->q2 : Eps q2->q3 : Eps q3->q4 : Eps q2->q5 : Eps q2->q6 : Eps """ multiChainOfEpsNfaStr = """ q0->q1 : Eps q1->q2 : Eps q3->q4 : Eps q5->q6 : Eps """ cycleOfEpsNfaStr = """ q0->q1 : Eps q1->q2 : Eps q2->q3 : Eps q3->q4 : Eps q4->q0 : Eps """ chainWithAccept = """ q0->q1 : Eps q1->q2 : Eps q2->qA : Eps """ nfaTree = Nfa(treeOfEpsNfaStr) nfaCycle = Nfa(cycleOfEpsNfaStr) nfaAccept = Nfa(chainWithAccept) nfaMulti = Nfa(multiChainOfEpsNfaStr) nfa1 = Nfa(rf('example2.nfa')) nfa2 = Nfa(rf('mult2or3Gs.nfa')) testvals = [ (nfa1, set(['q0']), frozenset(['q0']) ), (nfa1, set(['q1']), frozenset(['q1', 'q2']) ), (nfa2, set(['q0']), frozenset(['q0', 'q1', 'q3']) ), (nfaTree, set(['q0']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6']) ), (nfaTree, set(['q1']), frozenset(['q1', 'q2', 'q3', 'q4', 'q5', 'q6']) ), (nfaCycle, set(['q0']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4']) ), (nfaCycle, set(['q1']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4']) ), (nfaAccept, set(['q0']), frozenset(['qA']) ), (nfaMulti, set(['q0']), frozenset(['q0', 'q1', 'q2']) ), (nfaMulti, set(['q2']), frozenset(['q2']) ), (nfaMulti, set(['q1', 'q5']), frozenset(['q1', 'q2', 'q5', 'q6']) ), (nfaMulti, set(['q0', 'q3', 'q5']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6']) ), ] for ( nfa, state, solution) in testvals: val = getAllStayDests(nfa, state) utils.tprint(state, ':', val) assert val == solution
def armc_vs_merge_vs_prune(): ratio = .2 target = 'automata/sprobe.fa' train = 'pcaps/10k.pcap' test = 'pcaps/40k.pcap' freq = Nfa.parse(target).get_freq(train) res_dir = 'experiments/armc' eval_nfa(target, os.path.join(res_dir, 'armc_bare_th7.fa'), test, train, armc, { 'ratio': ratio, 'th': .7, 'prune_empty': 1 }) eval_nfa(target, os.path.join(res_dir, 'armc_prune_th7.fa'), test, train, armc, { 'ratio': ratio, 'th': .7, 'prune_empty': 0 }) eval_nfa(target, os.path.join(res_dir, 'armc_prune_th5.fa'), test, train, armc, { 'ratio': ratio, 'th': .7, 'prune_empty': 0 }) eval_nfa(target, os.path.join(res_dir, 'armc_prune_th1.fa'), test, train, armc, { 'ratio': ratio, 'th': .1, 'prune_empty': 0 }) eval_nfa(target, os.path.join(res_dir, 'prune.fa'), test, freq, reduce_nfa, { 'ratio': ratio, 'merge': 0 }) eval_nfa(target, os.path.join(res_dir, 'merge.fa'), test, freq, reduce_nfa, { 'ratio': ratio, 'merge': 1 })
def testConvertNfatoDfa(): testvals = [ # format is: #(nfaFile, alphabet, maxShortInputLen, maxRandInputLen, numRandInputs) ('chainWithAccept.nfa', 'abc', 4, 20, 100), ('CGstar.nfa', 'CAGT', 5, 20, 100), ('simple1.nfa', 'CAGT', 6, 12, 1000), ('simple2.nfa', 'CAGT', 6, 12, 10000), ('simple3.nfa', 'CAGT', 6, 12, 10000), ('mult2or3Gs.nfa', 'G', 10, 50, 1000), ('example2.nfa', 'CAGT', 9, 20, 10000), ] numTests = 0; for (nfaFile, alphabet, maxShortInputLen, maxRandInputLen, numRandInputs) in testvals: numTests += 1 if utils.BRIEF_TESTS and numTests > NUM_BRIEF_TESTS: break nfaStr = rf(nfaFile) nfa = Nfa(nfaStr) dfaStr = convertNfaToDfa(nfaStr) utils.tprint('\n'.join(['nfaStr', nfaStr, '\ndfaStr', dfaStr])) checkEquivalent(dfaStr, nfa, alphabet, maxShortInputLen, maxRandInputLen, \ numRandInputs, tmType = 'dfa')
#!/usr/bin/env python3 import sys from nfa import Nfa def adjust(s1, s2, s3, s4): print(s1.ljust(30), s2.ljust(7), s3.ljust(5), s4) adjust('nfa', 'states', 'fin', 'trans') for x in sys.argv[1:]: v = Nfa.nfa_size(x) adjust(*v)
def simulateNfa(nfaString, inString): tm = Nfa(nfaString) tm.reset(inString) tmResult = tm.run() return tmResult
def convertNfaToDfa(nfaString): nfa = Nfa(nfaString, name = 'sourceNfa') # print('source nfa is', nfa.write()) dfa = Dfa(None, name = 'destDfa') # Dictionary of states in destDfa. Key is the name of the state (e.g. q0, qA), # and value is the set of states in sourceNfa to which the destDfa # state corresponds. stateToSubset = dict() # As above, but the key is the value and vice versa subsetToState = dict() # Find out where we can get to in the nfa without consuming any symbols. initialStateSet = getAllStayDests(nfa, set([TuringMachine.startState])) # If we can immediately reach the accept state, we are in the # special case of accepting all strings, so construct and return a # suitable dfa. if TuringMachine.acceptState in initialStateSet: t = Transition(TuringMachine.startState, TuringMachine.acceptState, TuringMachine.anySym, None, TuringMachine.rightDir) dfa.addTransition(t) return dfa.write() # Begin the core algorithm by creating the first state in the dfa, # which corresponds to the subset of states in the NFA that can be # reached without consuming any symbols. stateToSubset[TuringMachine.startState] = initialStateSet subsetToState[ initialStateSet ] = TuringMachine.startState # Also create an accept state stateToSubset[TuringMachine.acceptState] = frozenset([TuringMachine.acceptState]) subsetToState[ frozenset([TuringMachine.acceptState]) ] = TuringMachine.acceptState # Keep track of which states in the dfa have not yet been processed unprocessedDfaStates = [TuringMachine.startState] iteration = 0 # for debugging while len(unprocessedDfaStates)>0: # print('iteration:', iteration); iteration+=1 dfa.unifyTransitions(); # print(dfa.write()) dfaState = unprocessedDfaStates.pop() nfaStates = stateToSubset[dfaState] # print('processing dfa state', dfaState, 'corresponding to nfa states', nfaStates) # 'transitions' will store transitions from dfaState. # key is label, value is a set of destinations transitions = dict() for c in TuringMachine.validSymbols: transitions[c] = set() for nfaState in nfaStates: # print('processing nfaState', nfaState) transitionList = nfa.getTransitions(nfaState) # print('transitions are', transitionList) for t in transitionList: for c in TuringMachine.validSymbols: # The special anySym symbol with the 'stay' # direction represents an epsilon-transition. # These will be dealt with separately. For now, # don't consider these to be matches. if t.label==TuringMachine.anySym and \ t.direction == TuringMachine.stayDir: continue if nfa.labelMatchesSymbol(c, t.label): # print(c, 'matches label', t.label) if t.destState != TuringMachine.rejectState: # print('adding transition with label', c, 'and dest', t.destState) transitions[c].add(t.destState) # Expand the destinations by following all epsilon-transitions. # The values will now be frozensets, which is what we need. for c in TuringMachine.validSymbols: transitions[c] = getAllStayDests(nfa, transitions[c]) # Add any new transitions to the dfa for label, subset in transitions.items(): if len(subset) == 0: continue if subset not in subsetToState: name = getNewStateName(stateToSubset) stateToSubset[name] = subset subsetToState[subset] = name unprocessedDfaStates.append(name) else: name = subsetToState[subset] tr = Transition(dfaState, name, label, None, TuringMachine.rightDir) dfa.addTransition(tr) # not strictly necessary, but makes the representation easier to test dfa.unifyTransitions() return dfa.write()
def main(): parser = argparse.ArgumentParser(description='Approximate NFA reduction.') parser.add_argument('-r', '--ratio', metavar='N', type=float, default=.2, help='reduction ratio') parser.add_argument('input', type=str, help='NFA to reduce') parser.add_argument('-n', '--nw', type=int, default=multiprocessing.cpu_count() - 1, help='number of workers to run in parallel') parser.add_argument('--test', nargs='+', type=str, metavar='PCAP', help='test pcap files') parser.add_argument('--train', type=str, metavar='PCAP', help='train pcap file') group = parser.add_mutually_exclusive_group() group.add_argument('-m', '--merge', action='store_true', help='merging reduction') group.add_argument( '-a', '--armc', action='store_true', help='merging reduction inspired by abstract regular model checking') parser.add_argument('-th', '--thresh', type=float, metavar='N', help='threshold for merging', default=.995) parser.add_argument('-mf', '--maxfr', type=float, default=.1, metavar='N', help='max frequency of a state allowed to be merged') parser.add_argument('-o', '--output', type=str, default='output.fa') args = parser.parse_args() if (args.merge or args.armc) and not args.train: raise SystemError('--train option is required when merging') # get NFA aut = Nfa.parse(args.input) if args.armc: # merge using armc and prune aut, m = armc(aut, args.train, ratio=args.ratio, th=args.thresh, merge_empty=False) sys.stderr.write('states merged: ' + str(m) + '\n') else: sys.stderr.write('reduction ratio: ' + str(args.ratio) + '\n') freq = aut.get_freq(args.train) aut, m = reduce_nfa(aut, freq, ratio=args.ratio, merge=args.merge, th=args.thresh, mf=args.maxfr) if args.merge: sys.stderr.write('states merged: ' + str(m) + '\n') with open(args.output, 'w') as f: sys.stderr.write('saved as ' + args.output + '\n') aut.print(f) if args.test: sys.stderr.write('evaluation reduction error\n') reduced = args.output r = Nfa.eval_accuracy(args.input, args.output, ' '.join(args.test), nw=args.nw) total, fp, tp = 0, 0, 0 for b in r.split('\n'): if b != '': _, _, s1, _, _, s2, s3 = b.split(',') total += int(s1) fp += int(s2) tp += int(s3) print('error:', round(fp / total, 4)) if tp + fp > 0: print('precision:', round(tp / (fp + tp), 4))
def reduce_eval(fa_name, *, test, train=None, ratios, merge=False, ths=[.995], mfs=[.1], nw=1): ''' Perform several approximate reductions and store results to files. Parameters ---------- fa_name : str name of the file with the NFA test : list ShellRegex expressions which matches PCAP files used for reduction error evaluation train : PCAP filename used for calculating packet frequency ratios : list reduction ratios merge : use merging reduction before pruning ths : list merging thresholds mfs : list maximal frequency merging parameters nw : int number of threads to run in parallel ''' RED_DIR = 'experiments/nfa' ERR_CSV = 'experiments/eval.csv' RED_CSV = 'experiments/reduction.csv' if not merge: ths, mfs = [None], [None] test_data = ' '.join(set([item for sub in test for item in glob(sub)])) assert len(test_data) >= 1 assert 1 <= nw <= multiprocessing.cpu_count() for i in test_data.split(): check_file(i) for i in ['state_frequency', 'nfa_eval']: check_file(i) check_file(RED_DIR, True) for i in ratios: assert 0.0001 < i < 0.99 aut = Nfa.parse(fa_name) cname = os.path.basename(fa_name).replace('.fa','') orig_name = os.path.join(RED_DIR, cname + '.msfm') with open(orig_name,'w') as f: aut.print(f,how='msfm') freq = aut.get_freq(train) reduction_csv = [] eval_csv = [] for r, th, mf in itertools.product(ratios, ths, mfs): a, m = reduce_nfa(deepcopy(aut), freq, r, merge, th, mf) # save reduction data cname = os.path.basename(fa_name).replace('.fa','') idx = 0 while True: h = str(idx).zfill(5) reduced = os.path.join(RED_DIR, cname + '.' + h + '.fa') msfm = os.path.join(RED_DIR, cname + '.' + h + '.msfm') if not os.path.exists(reduced): break idx += 1 # save reduced nfa a.merge_redundant_states() with open(reduced,'w') as f: a.print(f) # save nfa in msfm format with open(msfm,'w') as f: a.print(f,how='msfm') # store reduction result to csv pname = str(train) cname = os.path.basename(reduced).replace('.fa','') if merge: o = ','.join([str(x) for x in [cname, os.path.basename(pname), r, th, mf, m, a.state_count, a.trans_count]]) else: o = ','.join([str(x) for x in [cname, os.path.basename(pname), r, 'NA', 'NA', 0, a.state_count, a.trans_count]]) reduction_csv.append(o) # eval error and save result eval_csv.append(Nfa.eval_accuracy(fa_name, reduced, test_data, nw=nw)) with open(ERR_CSV, 'a') as f: for i in eval_csv: f.write(i) with open(RED_CSV, 'a') as f: for i in reduction_csv: f.write(i + '\n')
def main(): parser = argparse.ArgumentParser(description='Approximate NFA reduction.') parser.add_argument('-r', '--ratio', metavar='N', type=float, default=.2, help='reduction ratio') parser.add_argument('input', type=str, help='NFA to reduce') parser.add_argument('-n', '--nw', type=int, default=multiprocessing.cpu_count() - 1, help='number of workers to run in parallel') parser.add_argument('--test', nargs='+', type=str, metavar='PCAP', help='test pcap files') parser.add_argument('--train', type=str, metavar='PCAP', help='train pcap file') group = parser.add_mutually_exclusive_group() #action store true means if given store true else store false group.add_argument('-m', '--merge', action='store_true', help='merging reduction') group.add_argument( '-a', '--armc', action='store_true', help='merging reduction inspired by abstract regular model checking') parser.add_argument( '-fp', '--freq_pruning', action='store_true', help= 'frequency based pruning reduction (not tested in combination with -m merge)' ) parser.add_argument('-th', '--thresh', type=float, metavar='N', help='threshold for merging', default=.995) parser.add_argument('-mf', '--maxfr', type=float, default=.1, metavar='N', help='max frequency of a state allowed to be merged') parser.add_argument('-o', '--output', type=str, default='output.fa') args = parser.parse_args() if (args.merge or args.armc) and not args.train: raise SystemError('--train option is required when merging') #saving results for later automatic testing prune = "p" if args.freq_pruning: prune = "fp" os.makedirs("results", exist_ok=True) results_file = f"results/{os.path.basename(args.train)}_{os.path.basename(args.input)}_{prune}_{args.ratio}.txt" if os.path.exists(results_file): print(results_file, "already exists.") sys.exit() # get NFA #takes nfa from input and in class Nfa is uses function parse, which reads it aut = Nfa.parse(args.input) if args.armc: # method of merging.merge using armc and prune, uses similare state of prefixes #armc function returns two values. aut and m(number of states merged) aut, m = armc(aut, args.train, ratio=args.ratio, th=args.thresh, merge_empty=False) sys.stderr.write('states merged: ' + str(m) + '\n') else: #if there is no armc it comes here #if -m not given it does just pruning(happens inside reduce_nfa, merge=args.merge ) #if ratio not given it is .2 by default sys.stderr.write('reduction ratio: ' + str(args.ratio) + '\n') #get_freq is in nfa.py inside Nfa class #it knows what is in nfa and using train it calculates frequency #returns dictionary, so freq is dictionary, state:frequency #it computes state frequency, how many times has state been visited, returns dictionary,state and number #REMOVE LATER FREQ_FILE=TRUE freq = aut.get_freq(args.train, freq_file=False) #second method of merging, uses state frequency aut, m, max_err = reduce_nfa(aut, freq, ratio=args.ratio, merge=args.merge, freq_pruning=args.freq_pruning, th=args.thresh, mf=args.maxfr) if args.merge: sys.stderr.write('states merged: ' + str(m) + '\n') #writes reduced nfa into file with open(args.output, 'w') as f: sys.stderr.write('saved as ' + args.output + '\n') aut.print(f) #it computes the reduction error if args.test: sys.stderr.write('evaluation reduction error\n') #reduced is not used further, but it puts the name of the output file, as given in arguments reduced = args.output #function of class Nfa, in file nfa.py, calls external program for err evaluation nfa_eval it is c++ #we can run nfa_eval by ourselves #returns string of values, separated by comma r = Nfa.eval_accuracy(args.input, args.output, ' '.join(args.test), nw=args.nw) total, fp, tp = 0, 0, 0 #r has many lines and each line has commas, separates by lines for b in r.split('\n'): #for every line in r, if not emlty it devides by comma and it has 7values and we want only 3 of them # rest is lost if b != '': _, _, s1, _, _, s2, s3 = b.split(',') total += int(s1) fp += int(s2) tp += int(s3) real_err = round(fp / total, 4) print('real error:', round(fp / total, 4)) estim_err = -1 if max_err != -1: #divivde by sum of freq of all final states of original automata estim_err = round(max_err / total, 4) print('estimated error of freq pruning', round(max_err / total, 4)) if tp + fp > 0: precis = round(tp / (fp + tp), 4) print('precision:', round(tp / (fp + tp), 4)) #4 means 4 decimal numbers in the funcion round() with open(results_file, 'w') as fptr: fptr.write("#real_error, precision, estimated_error\n") fptr.write(f"{real_err},{precis},{estim_err}\n") print(results_file, "saved.")
import json from nfa import Nfa from dfa import Dfa if __name__ == "__main__": with open("json/dfa.json", "r") as f: data = json.load(f) con = Dfa(data) c = con.minimize() print(c.minimize()) print(c.read("aab")) print(con.read("aab")) with open("json/nfa.json", "r") as f: data = json.load(f) con = Nfa(data) h = con.determine() print(h) print(con.read("ab")) a = h.dot_dictionary("hola") print(a)