Esempio n. 1
0
class Fa(InterfaceFa):
    def __init__(self, automaton):
        self.__automaton = automaton
        if self.automaton["deterministic"]:
            self.heritage = Dfa(automaton)
        elif not self.automaton["deterministic"]:
            self.heritage = Nfa(automaton)
        else:
            raise TypeError

    def read(self, word):
        return self.heritage.read(word)

    @property
    def automaton(self):
        return self.__automaton

    @property
    def states(self):
        return self.heritage.states

    @property
    def alphabet(self):
        return self.heritage.alphabet

    @property
    def dictionary(self):
        return self.heritage.dictionary

    def __repr__(self):
        return self.heritage.dictionary

    def __str__(self):
        return self.heritage.__str__()
Esempio n. 2
0
def eval_nfa(target, fname, test, train, method, args):
    a = Nfa.parse(target)
    _, m = method(a, train, **args)
    with open(fname, 'w') as f: a.print(f)
    r = Nfa.eval_accuracy(target, fname, test)
    _,_,total, _, _, fp, tp = r.split(',')
    print(fname,round(int(fp) / int(total), 4),m,sep=',')
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        metavar='FILE',
                        default="automaton.fa",
                        help='output file')

    parser.add_argument('input', metavar='NFA', type=str)

    parser.add_argument('-s',
                        '--sub',
                        type=str,
                        metavar='xNFA',
                        help='check whether L(NFA) is a subset of L(xNFA)')

    args = parser.parse_args()

    if args.sub:
        jarfile = search_for_file('RABIT.jar')
        if jarfile == None:
            sys.stderr.write(
                'Error: cannot find RABIT tool in this directory\n')
            sys.exit(1)
        aut1 = Nfa.parse(args.input)
        aut2 = Nfa.parse(args.sub)
        aut1.selfloop_to_finals()
        aut2.selfloop_to_finals()
        aut1_ba = tempfile.NamedTemporaryFile()
        aut2_ba = tempfile.NamedTemporaryFile()
        aut1.print(open(aut1_ba.name, 'w'), how='ba')
        aut2.print(open(aut2_ba.name, 'w'), how='ba')

        proc = 'java -jar ' + jarfile + ' ' + aut1_ba.name + ' ' + \
        aut2_ba.name + ' -fast -finite'
        subprocess.call(proc.split())
    else:
        jarfile = search_for_file('Reduce.jar')
        if jarfile == None:
            sys.stderr.write(
                'Error: cannot find Reduce tool in this directory\n')
            sys.exit(1)
        if not args.output:
            sys.stderr.write('Error: no output specified\n')
            exit(1)
        ba_file = tempfile.NamedTemporaryFile()
        reduce_file = tempfile.NamedTemporaryFile()

        aut = Nfa.parse(args.input, 'fa')
        aut.extend_final_states()
        write_output(ba_file.name, aut.write(how='ba'))

        proc = "java -jar " + jarfile + " " + ba_file.name + \
        " 10 -sat -finite -o " + reduce_file.name
        subprocess.call(proc.split())
        aut = Nfa.parse(reduce_file.name, 'ba')
        aut.retrieve_final_states()
        write_output(args.output, aut.write())
Esempio n. 4
0
 def __init__(self, automaton):
     self.__automaton = automaton
     if self.automaton["deterministic"]:
         self.heritage = Dfa(automaton)
     elif not self.automaton["deterministic"]:
         self.heritage = Nfa(automaton)
     else:
         raise TypeError
Esempio n. 5
0
def timbuk2fa(fname):
    nfa = Nfa()
    smap = dict()
    cnt = 0

    def add_state(p):
        nonlocal smap
        nonlocal cnt
        if not p in smap:
            smap[p] = cnt
            cnt += 1
        return smap[p]

    with open(fname, 'r') as f:
        ts = 0
        for line in f:
            line = line[:-1]
            if line == "": continue
            if line.startswith("Final States"):
                for x in [x for x in line.split()][2:]:
                    nfa._add_final_state(add_state(x))

            if ts == 1:
                init = line.split()[-1]
                nfa._add_initial_state(add_state(init))
                ts = 2
            elif ts == 2:
                a, p, q = re.sub('["\(\)\->]', ' ', line).split()
                p = add_state(p)
                q = add_state(q)
                nfa._add_rule(p, q, int(a))

            if line.startswith("Transitions"):
                ts = 1
    return nfa
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input', metavar='NFA', type=str)
    parser.add_argument('output', metavar='OUT', type=str)
    args = parser.parse_args()

    tb_file = tempfile.NamedTemporaryFile()
    min_file = tempfile.NamedTemporaryFile()

    sys.stderr.write('Parsing FA file\n')
    aut = Nfa.parse(args.input)
    sys.stderr.write('Extending final states\n')
    sym = aut.extend_final_states()
    sys.stderr.write('Converting to timbuk format\n')
    fa2timbuk(aut, sym, tb_file.name)
    fa2timbuk(aut, sym, 'tmp2')
    sys.stderr.write('Parsing timbuk file\n')
    a = sbl.parse(tb_file.name)
    sys.stderr.write('Minimizing\n')
    a.minimize().print_automaton(min_file.name)
    sys.stderr.write('Converting to FA\n')
    aut = timbuk2fa(min_file.name)
    sys.stderr.write('Retrieving final states\n')
    aut.retrieve_final_states()

    write_output(args.output, aut.write())
    sys.stderr.write('Saved as ' + args.output + '\n')
Esempio n. 7
0
def main():
    if len(sys.argv) < 3:
        raise SystemError('two arguments required: INPUT OUTPUT')
    a = Nfa.parse(sys.argv[1])
    a.merge_redundant_states()
    with open(sys.argv[2], 'w') as f:
        a.print(f)
Esempio n. 8
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        metavar='FILE',
                        default="Aut.dot",
                        help='output file')

    parser.add_argument('input', metavar='NFA', type=str)
    parser.add_argument('-f', '--freq', type=str, help='heat map')
    parser.add_argument('-t',
                        '--trans',
                        action='store_true',
                        help='show transition labels')
    parser.add_argument('-r',
                        '--rules',
                        type=int,
                        help='number of rules to show')
    parser.add_argument('-d',
                        '--depth',
                        type=int,
                        help='maximal depth of a state to display')

    parser.add_argument("--dpi",
                        type=int,
                        default=3000,
                        help="output image dpi")
    args = parser.parse_args()

    aut = Nfa.parse(args.input)

    freq = None
    if args.freq:
        freq = get_freq(args.freq)

    states = set(aut.states)
    if args.rules:
        rules = list(aut.fin_pred().values())[0:args.rules]
        states = set([s for subl in rules for s in subl])

    if args.depth:
        depth = aut.state_depth
        states = set(filter(lambda x: depth[x] < args.depth, states))

    out = aut.write_dot(show_trans=args.trans,
                        freq=freq,
                        states=states,
                        freq_scale=lambda x: math.log(x + 2),
                        show_diff=0)

    write_output(args.output, out)
    image = args.output.split('.dot')[0] + '.png'
    prog = 'dot -Tpng ' + args.output + ' -Gdpi=' + str(
        args.dpi) + ' -o ' + image
    subprocess.call(prog.split())
    prog = 'xdg-open ' + image
    subprocess.call(prog.split())
def testGetAllStayDests():
    treeOfEpsNfaStr = """
q0->q1 : Eps
q1->q2 : Eps
q2->q3 : Eps
q3->q4 : Eps
q2->q5 : Eps
q2->q6 : Eps
"""
    multiChainOfEpsNfaStr = """
q0->q1 : Eps
q1->q2 : Eps
q3->q4 : Eps
q5->q6 : Eps
"""

    cycleOfEpsNfaStr = """
q0->q1 : Eps
q1->q2 : Eps
q2->q3 : Eps
q3->q4 : Eps
q4->q0 : Eps
"""

    chainWithAccept = """
q0->q1 : Eps
q1->q2 : Eps
q2->qA : Eps
"""

    nfaTree = Nfa(treeOfEpsNfaStr)
    nfaCycle = Nfa(cycleOfEpsNfaStr)
    nfaAccept = Nfa(chainWithAccept)
    nfaMulti = Nfa(multiChainOfEpsNfaStr)
    nfa1 = Nfa(rf('example2.nfa'))
    nfa2 = Nfa(rf('mult2or3Gs.nfa'))
    testvals = [
        (nfa1, set(['q0']), frozenset(['q0'])  ),
        (nfa1, set(['q1']), frozenset(['q1', 'q2'])  ),
        (nfa2, set(['q0']), frozenset(['q0', 'q1', 'q3'])  ),
        (nfaTree, set(['q0']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6'])  ),
        (nfaTree, set(['q1']), frozenset(['q1', 'q2', 'q3', 'q4', 'q5', 'q6'])  ),
        (nfaCycle, set(['q0']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4'])  ),
        (nfaCycle, set(['q1']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4'])  ),
        (nfaAccept, set(['q0']), frozenset(['qA'])  ),
        (nfaMulti, set(['q0']), frozenset(['q0', 'q1', 'q2'])  ),
        (nfaMulti, set(['q2']), frozenset(['q2'])  ),
        (nfaMulti, set(['q1', 'q5']), frozenset(['q1', 'q2', 'q5', 'q6'])  ),
        (nfaMulti, set(['q0', 'q3', 'q5']), frozenset(['q0', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6'])  ),
    ]
    for ( nfa, state, solution) in testvals:
        val = getAllStayDests(nfa, state)
        utils.tprint(state, ':', val)
        assert val == solution
Esempio n. 10
0
def armc_vs_merge_vs_prune():
    ratio = .2
    target = 'automata/sprobe.fa'
    train = 'pcaps/10k.pcap'
    test = 'pcaps/40k.pcap'
    freq = Nfa.parse(target).get_freq(train)
    res_dir = 'experiments/armc'

    eval_nfa(target, os.path.join(res_dir, 'armc_bare_th7.fa'), test, train,
             armc, {
                 'ratio': ratio,
                 'th': .7,
                 'prune_empty': 1
             })
    eval_nfa(target, os.path.join(res_dir, 'armc_prune_th7.fa'), test, train,
             armc, {
                 'ratio': ratio,
                 'th': .7,
                 'prune_empty': 0
             })
    eval_nfa(target, os.path.join(res_dir, 'armc_prune_th5.fa'), test, train,
             armc, {
                 'ratio': ratio,
                 'th': .7,
                 'prune_empty': 0
             })
    eval_nfa(target, os.path.join(res_dir, 'armc_prune_th1.fa'), test, train,
             armc, {
                 'ratio': ratio,
                 'th': .1,
                 'prune_empty': 0
             })

    eval_nfa(target, os.path.join(res_dir, 'prune.fa'), test, freq, reduce_nfa,
             {
                 'ratio': ratio,
                 'merge': 0
             })
    eval_nfa(target, os.path.join(res_dir, 'merge.fa'), test, freq, reduce_nfa,
             {
                 'ratio': ratio,
                 'merge': 1
             })
Esempio n. 11
0
def testConvertNfatoDfa():
    testvals = [
        # format is:
        #(nfaFile, alphabet, maxShortInputLen, maxRandInputLen, numRandInputs)
        ('chainWithAccept.nfa', 'abc', 4, 20, 100),
        ('CGstar.nfa', 'CAGT', 5, 20, 100),
        ('simple1.nfa', 'CAGT', 6, 12, 1000),
        ('simple2.nfa', 'CAGT', 6, 12, 10000),
        ('simple3.nfa', 'CAGT', 6, 12, 10000),
        ('mult2or3Gs.nfa', 'G', 10, 50, 1000),
        ('example2.nfa', 'CAGT', 9, 20, 10000),
    ]
    numTests = 0;
    for (nfaFile, alphabet, maxShortInputLen, maxRandInputLen, numRandInputs) in testvals:
        numTests += 1
        if utils.BRIEF_TESTS and numTests > NUM_BRIEF_TESTS: break
        nfaStr = rf(nfaFile)
        nfa = Nfa(nfaStr)
        dfaStr = convertNfaToDfa(nfaStr)
        utils.tprint('\n'.join(['nfaStr', nfaStr, '\ndfaStr', dfaStr]))
        checkEquivalent(dfaStr, nfa, alphabet, maxShortInputLen, maxRandInputLen, \
                        numRandInputs, tmType = 'dfa')
Esempio n. 12
0
#!/usr/bin/env python3

import sys
from nfa import Nfa


def adjust(s1, s2, s3, s4):
    print(s1.ljust(30), s2.ljust(7), s3.ljust(5), s4)


adjust('nfa', 'states', 'fin', 'trans')

for x in sys.argv[1:]:
    v = Nfa.nfa_size(x)
    adjust(*v)
Esempio n. 13
0
def simulateNfa(nfaString, inString):
    tm = Nfa(nfaString)
    tm.reset(inString)
    tmResult = tm.run()
    return tmResult
Esempio n. 14
0
def convertNfaToDfa(nfaString):
    nfa = Nfa(nfaString, name = 'sourceNfa')

    # print('source nfa is', nfa.write())

    dfa = Dfa(None, name = 'destDfa')
    # Dictionary of states in destDfa. Key is the name of the state (e.g. q0, qA),
    # and value is the set of states in sourceNfa to which the destDfa
    # state corresponds.
    stateToSubset = dict()
    # As above, but the key is the value and vice versa
    subsetToState = dict()

    # Find out where we can get to in the nfa without consuming any symbols.
    initialStateSet = getAllStayDests(nfa, set([TuringMachine.startState]))

    # If we can immediately reach the accept state, we are in the
    # special case of accepting all strings, so construct and return a
    # suitable dfa.
    if TuringMachine.acceptState in initialStateSet:
        t = Transition(TuringMachine.startState, TuringMachine.acceptState,
                       TuringMachine.anySym, None, TuringMachine.rightDir)
        dfa.addTransition(t)
        return dfa.write()

    # Begin the core algorithm by creating the first state in the dfa,
    # which corresponds to the subset of states in the NFA that can be
    # reached without consuming any symbols.
    stateToSubset[TuringMachine.startState] = initialStateSet
    subsetToState[ initialStateSet  ] = TuringMachine.startState

    # Also create an accept state
    stateToSubset[TuringMachine.acceptState] = frozenset([TuringMachine.acceptState])
    subsetToState[ frozenset([TuringMachine.acceptState])  ] = TuringMachine.acceptState
    
    # Keep track of which states in the dfa have not yet been processed
    unprocessedDfaStates = [TuringMachine.startState]

    iteration = 0 # for debugging
    while len(unprocessedDfaStates)>0:
        # print('iteration:', iteration); iteration+=1
        dfa.unifyTransitions(); # print(dfa.write())
        dfaState = unprocessedDfaStates.pop()
        nfaStates = stateToSubset[dfaState]

        # print('processing dfa state', dfaState, 'corresponding to nfa states', nfaStates)

        # 'transitions' will store transitions from dfaState.
        # key is label, value is a set of destinations
        transitions = dict()
        for c in TuringMachine.validSymbols:
            transitions[c] = set()
        for nfaState in nfaStates:
            # print('processing nfaState', nfaState)
            transitionList = nfa.getTransitions(nfaState)
            # print('transitions are', transitionList)
            for t in transitionList:
                for c in TuringMachine.validSymbols:
                    # The special anySym symbol with the 'stay'
                    # direction represents an epsilon-transition.
                    # These will be dealt with separately. For now,
                    # don't consider these to be matches.
                    if t.label==TuringMachine.anySym and \
                       t.direction == TuringMachine.stayDir:
                        continue
                    if nfa.labelMatchesSymbol(c, t.label):
                        # print(c, 'matches label', t.label)
                        if t.destState != TuringMachine.rejectState:
                            # print('adding transition with label', c, 'and dest', t.destState)
                            transitions[c].add(t.destState)
        # Expand the destinations by following all epsilon-transitions.
        # The values will now be frozensets, which is what we need.
        for c in TuringMachine.validSymbols:
            transitions[c] = getAllStayDests(nfa, transitions[c])
        # Add any new transitions to the dfa
        for label, subset in transitions.items():
            if len(subset) == 0:
                continue
            if subset not in subsetToState:
                name = getNewStateName(stateToSubset)
                stateToSubset[name] = subset
                subsetToState[subset] = name
                unprocessedDfaStates.append(name)
            else:
                name = subsetToState[subset]
            tr = Transition(dfaState, name, label, None, TuringMachine.rightDir)
            dfa.addTransition(tr)

    # not strictly necessary, but makes the representation easier to test
    dfa.unifyTransitions()

    return dfa.write()
Esempio n. 15
0
def main():
    parser = argparse.ArgumentParser(description='Approximate NFA reduction.')
    parser.add_argument('-r',
                        '--ratio',
                        metavar='N',
                        type=float,
                        default=.2,
                        help='reduction ratio')
    parser.add_argument('input', type=str, help='NFA to reduce')
    parser.add_argument('-n',
                        '--nw',
                        type=int,
                        default=multiprocessing.cpu_count() - 1,
                        help='number of workers to run in parallel')
    parser.add_argument('--test',
                        nargs='+',
                        type=str,
                        metavar='PCAP',
                        help='test pcap files')
    parser.add_argument('--train',
                        type=str,
                        metavar='PCAP',
                        help='train pcap file')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('-m',
                       '--merge',
                       action='store_true',
                       help='merging reduction')
    group.add_argument(
        '-a',
        '--armc',
        action='store_true',
        help='merging reduction inspired by abstract regular model checking')

    parser.add_argument('-th',
                        '--thresh',
                        type=float,
                        metavar='N',
                        help='threshold for merging',
                        default=.995)
    parser.add_argument('-mf',
                        '--maxfr',
                        type=float,
                        default=.1,
                        metavar='N',
                        help='max frequency of a state allowed to be merged')
    parser.add_argument('-o', '--output', type=str, default='output.fa')
    args = parser.parse_args()

    if (args.merge or args.armc) and not args.train:
        raise SystemError('--train option is required when merging')

    # get NFA
    aut = Nfa.parse(args.input)

    if args.armc:
        # merge using armc and prune
        aut, m = armc(aut,
                      args.train,
                      ratio=args.ratio,
                      th=args.thresh,
                      merge_empty=False)
        sys.stderr.write('states merged: ' + str(m) + '\n')
    else:
        sys.stderr.write('reduction ratio: ' + str(args.ratio) + '\n')
        freq = aut.get_freq(args.train)
        aut, m = reduce_nfa(aut,
                            freq,
                            ratio=args.ratio,
                            merge=args.merge,
                            th=args.thresh,
                            mf=args.maxfr)
        if args.merge:
            sys.stderr.write('states merged: ' + str(m) + '\n')

    with open(args.output, 'w') as f:
        sys.stderr.write('saved as ' + args.output + '\n')
        aut.print(f)

    if args.test:
        sys.stderr.write('evaluation reduction error\n')
        reduced = args.output
        r = Nfa.eval_accuracy(args.input,
                              args.output,
                              ' '.join(args.test),
                              nw=args.nw)
        total, fp, tp = 0, 0, 0
        for b in r.split('\n'):
            if b != '':
                _, _, s1, _, _, s2, s3 = b.split(',')
                total += int(s1)
                fp += int(s2)
                tp += int(s3)

        print('error:', round(fp / total, 4))
        if tp + fp > 0:
            print('precision:', round(tp / (fp + tp), 4))
Esempio n. 16
0
def reduce_eval(fa_name, *, test, train=None, ratios, merge=False, ths=[.995],
    mfs=[.1], nw=1):
    '''
    Perform several approximate reductions and store results to files.

    Parameters
    ----------
    fa_name : str
        name of the file with the NFA
    test : list
        ShellRegex expressions which matches PCAP files used for reduction error
        evaluation
    train :
        PCAP filename used for calculating packet frequency
    ratios : list
        reduction ratios
    merge :
        use merging reduction before pruning
    ths : list
        merging thresholds
    mfs : list
        maximal frequency merging parameters
    nw : int
        number of threads to run in parallel
    '''

    RED_DIR = 'experiments/nfa'
    ERR_CSV = 'experiments/eval.csv'
    RED_CSV = 'experiments/reduction.csv'

    if not merge:
        ths, mfs = [None], [None]

    test_data = ' '.join(set([item for sub in test for item in glob(sub)]))

    assert len(test_data) >= 1
    assert 1 <= nw <= multiprocessing.cpu_count()
    for i in test_data.split(): check_file(i)
    for i in ['state_frequency', 'nfa_eval']: check_file(i)
    check_file(RED_DIR, True)
    for i in ratios: assert 0.0001 < i < 0.99

    aut = Nfa.parse(fa_name)
    
    cname = os.path.basename(fa_name).replace('.fa','')
    orig_name = os.path.join(RED_DIR, cname + '.msfm')
    with open(orig_name,'w') as f: aut.print(f,how='msfm')
    
    freq = aut.get_freq(train)
    reduction_csv = []
    eval_csv = []

    for r, th, mf in itertools.product(ratios, ths, mfs):
        a, m = reduce_nfa(deepcopy(aut), freq, r, merge, th, mf)
        # save reduction data
        cname = os.path.basename(fa_name).replace('.fa','')
        idx = 0
        while True:
            h = str(idx).zfill(5)
            reduced = os.path.join(RED_DIR, cname + '.' + h + '.fa')
            msfm = os.path.join(RED_DIR, cname + '.' + h + '.msfm')
            if not os.path.exists(reduced): break
            idx += 1

        # save reduced nfa
        a.merge_redundant_states()
        with open(reduced,'w') as f: a.print(f)
        # save nfa in msfm format
        with open(msfm,'w') as f: a.print(f,how='msfm')

        # store reduction result to csv
        pname = str(train)

        cname = os.path.basename(reduced).replace('.fa','')
        if merge:
            o = ','.join([str(x) for x in [cname, os.path.basename(pname), r,
                th, mf, m, a.state_count, a.trans_count]])
        else:
            o = ','.join([str(x) for x in [cname, os.path.basename(pname), r,
                'NA', 'NA', 0, a.state_count, a.trans_count]])
        reduction_csv.append(o)

        # eval error and save result
        eval_csv.append(Nfa.eval_accuracy(fa_name, reduced, test_data, nw=nw))

    with open(ERR_CSV, 'a') as f:
        for i in eval_csv: f.write(i)
    with open(RED_CSV, 'a') as f:
        for i in reduction_csv: f.write(i + '\n')
Esempio n. 17
0
def main():
    parser = argparse.ArgumentParser(description='Approximate NFA reduction.')
    parser.add_argument('-r',
                        '--ratio',
                        metavar='N',
                        type=float,
                        default=.2,
                        help='reduction ratio')
    parser.add_argument('input', type=str, help='NFA to reduce')
    parser.add_argument('-n',
                        '--nw',
                        type=int,
                        default=multiprocessing.cpu_count() - 1,
                        help='number of workers to run in parallel')
    parser.add_argument('--test',
                        nargs='+',
                        type=str,
                        metavar='PCAP',
                        help='test pcap files')
    parser.add_argument('--train',
                        type=str,
                        metavar='PCAP',
                        help='train pcap file')

    group = parser.add_mutually_exclusive_group()
    #action store true means if given store true else store false
    group.add_argument('-m',
                       '--merge',
                       action='store_true',
                       help='merging reduction')
    group.add_argument(
        '-a',
        '--armc',
        action='store_true',
        help='merging reduction inspired by abstract regular model checking')

    parser.add_argument(
        '-fp',
        '--freq_pruning',
        action='store_true',
        help=
        'frequency based pruning reduction (not tested in combination with -m merge)'
    )
    parser.add_argument('-th',
                        '--thresh',
                        type=float,
                        metavar='N',
                        help='threshold for merging',
                        default=.995)
    parser.add_argument('-mf',
                        '--maxfr',
                        type=float,
                        default=.1,
                        metavar='N',
                        help='max frequency of a state allowed to be merged')
    parser.add_argument('-o', '--output', type=str, default='output.fa')
    args = parser.parse_args()

    if (args.merge or args.armc) and not args.train:
        raise SystemError('--train option is required when merging')

    #saving results for later automatic testing
    prune = "p"
    if args.freq_pruning:
        prune = "fp"

    os.makedirs("results", exist_ok=True)

    results_file = f"results/{os.path.basename(args.train)}_{os.path.basename(args.input)}_{prune}_{args.ratio}.txt"

    if os.path.exists(results_file):
        print(results_file, "already exists.")
        sys.exit()
    # get NFA
    #takes nfa from input and in class Nfa is uses function parse, which reads it
    aut = Nfa.parse(args.input)

    if args.armc:
        # method of merging.merge using armc and prune, uses similare state of prefixes
        #armc function returns two values. aut and m(number of states merged)
        aut, m = armc(aut,
                      args.train,
                      ratio=args.ratio,
                      th=args.thresh,
                      merge_empty=False)
        sys.stderr.write('states merged: ' + str(m) + '\n')
    else:
        #if there is no armc it comes here
        #if -m not given it does just pruning(happens inside reduce_nfa, merge=args.merge )
        #if ratio not given it is .2 by default
        sys.stderr.write('reduction ratio: ' + str(args.ratio) + '\n')
        #get_freq is in nfa.py inside Nfa class
        #it knows what is in nfa and using train it calculates frequency
        #returns dictionary, so freq is dictionary, state:frequency

        #it computes state frequency, how many times has state been visited, returns dictionary,state and number
        #REMOVE LATER  FREQ_FILE=TRUE
        freq = aut.get_freq(args.train, freq_file=False)

        #second method of merging, uses state frequency
        aut, m, max_err = reduce_nfa(aut,
                                     freq,
                                     ratio=args.ratio,
                                     merge=args.merge,
                                     freq_pruning=args.freq_pruning,
                                     th=args.thresh,
                                     mf=args.maxfr)
        if args.merge:
            sys.stderr.write('states merged: ' + str(m) + '\n')
    #writes reduced nfa into file
    with open(args.output, 'w') as f:
        sys.stderr.write('saved as ' + args.output + '\n')
        aut.print(f)
    #it computes the reduction error
    if args.test:
        sys.stderr.write('evaluation reduction error\n')
        #reduced is not used further, but it puts the name of the output file, as given in arguments
        reduced = args.output
        #function of class Nfa, in file nfa.py, calls external program for err evaluation nfa_eval it is c++
        #we can run nfa_eval by ourselves
        #returns string of values, separated by comma
        r = Nfa.eval_accuracy(args.input,
                              args.output,
                              ' '.join(args.test),
                              nw=args.nw)
        total, fp, tp = 0, 0, 0
        #r has many lines and each line has commas, separates by lines
        for b in r.split('\n'):
            #for every line in r, if not emlty it devides by comma and it has 7values and we want only 3 of them
            # rest is lost
            if b != '':
                _, _, s1, _, _, s2, s3 = b.split(',')
                total += int(s1)
                fp += int(s2)
                tp += int(s3)
        real_err = round(fp / total, 4)
        print('real error:', round(fp / total, 4))

        estim_err = -1
        if max_err != -1:
            #divivde by sum of freq of all final states of original automata
            estim_err = round(max_err / total, 4)
            print('estimated error of freq pruning', round(max_err / total, 4))
        if tp + fp > 0:
            precis = round(tp / (fp + tp), 4)
            print('precision:', round(tp / (fp + tp), 4))
            #4 means 4 decimal numbers in the funcion round()

        with open(results_file, 'w') as fptr:
            fptr.write("#real_error, precision, estimated_error\n")
            fptr.write(f"{real_err},{precis},{estim_err}\n")

        print(results_file, "saved.")
Esempio n. 18
0
import json

from nfa import Nfa
from dfa import Dfa

if __name__ == "__main__":
    with open("json/dfa.json", "r") as f:
        data = json.load(f)

        con = Dfa(data)
        c = con.minimize()
        print(c.minimize())
        print(c.read("aab"))
        print(con.read("aab"))
    with open("json/nfa.json", "r") as f:
        data = json.load(f)
        con = Nfa(data)
        h = con.determine()
        print(h)
        print(con.read("ab"))
        a = h.dot_dictionary("hola")
        print(a)