Esempio n. 1
0
def get_phf(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        Perfect hashing DFA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)
    # create phf_dfa automaton
    aut = PHF_DFA()
    # Make automaton from RE which was in input file
    aut.create_by_parser(po)
    # redefine default PHF class
    a = bdz()
    a.set_ratio(2.0)
    aut.set_PHF_class(a)
    # compute dfa and PHF table
    aut.compute()
    # Return experimental results
    return [
        "Perfect Hashing DFA",
        aut.get_state_num(),
        aut.get_trans_num(),
        aut.report_memory_real(),
        aut.report_memory_real()
    ]
Esempio n. 2
0
def get_hfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        History FA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)
    # Create History FA object
    history = HistoryFA()
    # Make automaton from RE which was in input file
    history.create_by_parser(po)
    # Remove epsilons
    history.remove_epsilons()
    # Store the NFA
    NFA = history.get_automaton(True)
    # Determinise the automaton
    history.determinise(create_table=True)
    # Create History FA
    history.compute(NFA)
    # Return experimental results
    return [
        "History FA",
        history.get_state_num(),
        history.get_trans_num(),
        history.report_memory_optimal(),
        history.report_memory_naive()
    ]
Esempio n. 3
0
def get_hybfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        History FA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)
    # Create Hybrid FA object
    hyb_fa = hybrid_fa()
    # Make automaton from RE which was in input file
    hyb_fa.create_by_parser(po)
    # set parameters for _is_special() function
    hyb_fa.set_max_head_size(-1)  # off
    hyb_fa.set_max_tx(-1)  # off
    hyb_fa.set_special_min_depth(2)
    # Create Hybrid FA
    hyb_fa.compute()
    # Return experimental results
    return [
        "Hybrid FA",
        hyb_fa.get_state_num(),
        hyb_fa.get_trans_num(),
        hyb_fa.report_memory_optimal(),
        hyb_fa.report_memory_naive()
    ]
Esempio n. 4
0
def get_ddfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for DDFA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)
    # Create Delay  DFA object
    DelayDfa = DELAY_DFA()
    # Make automaton from RE which was in input file
    DelayDfa.create_by_parser(po)
    # Make Delay DFA
    # Resolve alphabet
    DelayDfa.resolve_alphabet()
    # Create Delay DFA
    DelayDfa.compute()
    # Return experimental results
    return [
        "Delay DFA",
        DelayDfa.get_state_num(),
        DelayDfa.get_trans_num(),
        DelayDfa.report_memory_optimal(),
        DelayDfa.report_memory_naive()
    ]
Esempio n. 5
0
def get_hcfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for History\
        counting FA.
    """
    # Create parser - use default parser
    po = parser.parser("pcre_parser", True)
    # Parse input file
    po.load_file(ruleset)
    # Create History Counting FA object
    history_counting = HistoryCountingFA()
    # Make automaton from RE which was in input file
    history_counting.create_by_parser(po)
    # Remove epsilons
    history_counting.remove_epsilons()
    # Store the NFA
    NFA = history_counting.get_automaton(True)
    # Replace X{m,n} with X*
    NFA_without_cnt = \
    history_counting._replace_length_restriction_with_a_closure(NFA)
    NFA = history_counting.get_automaton(True)
    # Assingn the automaton with replaced X{m,n} with X*
    history_counting._automaton = NFA_without_cnt
    # Determinise the automaton
    history_counting.determinise(create_table = True)
    # Create History Counting FA
    history_counting.compute(NFA)
    # Return experimental results
    return ["History Counting FA", history_counting.get_state_num(), history_counting.get_trans_num(), history_counting.report_memory_optimal(), history_counting.report_memory_naive()]
Esempio n. 6
0
def generate_text(rule):
    """
        From given regular expression (rule) generate corresponding string.
    """
    aut = b_nfa()
    par = parser("pcre_parser")
    par.set_text(rule)
    if not aut.create_by_parser(par):
        return ""
    aut.remove_epsilons()
    aut.search("a")
    state = 0
    string = ""
    while not state in aut._automaton.final:
        trans = aut._mapper[state]
        rnd1 = random.randint(0, len(trans) - 1)
        sym =  aut._automaton.alphabet[list(trans)[rnd1][0]]
        if sym.get_type() == b_symbol.io_mapper["b_Sym_char_class"]:
            chars = sym.charClass
        else:
            chars = sym.char
        state = list(trans)[rnd1][1]
        rnd2 = random.randint(0, len(chars) - 1)
        string += list(chars)[rnd2]
    if 1 not in aut.search(string):
        print "FAIL"
    return string
Esempio n. 7
0
def get_hcfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for History\
        counting FA.
    """
    # Create parser - use default parser
    po = parser.parser("pcre_parser", True)
    # Parse input file
    po.load_file(ruleset)
    # Create History Counting FA object
    history_counting = HistoryCountingFA()
    # Make automaton from RE which was in input file
    history_counting.create_by_parser(po)
    # Remove epsilons
    history_counting.remove_epsilons()
    # Store the NFA
    NFA = history_counting.get_automaton(True)
    # Replace X{m,n} with X*
    NFA_without_cnt = \
    history_counting._replace_length_restriction_with_a_closure(NFA)
    NFA = history_counting.get_automaton(True)
    # Assingn the automaton with replaced X{m,n} with X*
    history_counting._automaton = NFA_without_cnt
    # Determinise the automaton
    history_counting.determinise(create_table=True)
    # Create History Counting FA
    history_counting.compute(NFA)
    # Return experimental results
    return [
        "History Counting FA",
        history_counting.get_state_num(),
        history_counting.get_trans_num(),
        history_counting.report_memory_optimal(),
        history_counting.report_memory_naive()
    ]
Esempio n. 8
0
def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "ho:d:si:S:",
            ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        elif o in ("-S", "--style"):
            fsm_style = a.lower()
        else:
            assert False, "unhandled option " + o

    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)

    if fsm_style == "nfa":
        fsm.remove_epsilons()
    elif fsm_style == "pa":
        fsm.thompson2glushkov()
    elif fsm_style == "dpa":
        fsm.thompson2reverse_glushkov()
    else:
        print "Unsupported automaton type: " + fsm_style
        print "Supported automata styles are: nfa, pa, dpa"
        print "See " + sys.argv[0] + " -h for details"
        sys.exit(1)

    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_timbuk(output_file, simple)
    if display_file != None:
        fsm.show(display_file, " ")
Esempio n. 9
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ho:d:si:S:", ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        elif o in ("-S", "--style"):
            fsm_style = a.lower()
        else:
            assert False, "unhandled option " + o
    
    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)
    
    if fsm_style == "nfa":
        fsm.remove_epsilons()
    elif fsm_style == "pa":
        fsm.thompson2glushkov()
    elif fsm_style == "dpa":
        fsm.thompson2reverse_glushkov()
    else:
        print "Unsupported automaton type: " + fsm_style
        print "Supported automata styles are: nfa, pa, dpa"
        print "See " + sys.argv[0] + " -h for details"
        sys.exit(1)
        
    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_timbuk(output_file, simple)
    if display_file != None:
        fsm.show(display_file, " ")
Esempio n. 10
0
    def test_report_memory_naive(self):
        """report_memory_naive()"""

        # /ab[^1234]*cd|efg/; test with an expression containing one
        # alternation [^1234]*, the second is not
        par = parser("pcre_parser")
        par.set_text("/ab[^1234]*cd|efg/")
        history = HistoryFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        history.compute(NFA)

        self.assertTrue(history.report_memory_naive() == 187)
Esempio n. 11
0
def get_sb(ruleset):
    # Create sourdis_bispo_nfa object
    cn = sourdis_bispo_nfa.sourdis_bispo_nfa()
    # Preprocess the REs
    preprocessed = cn.find_pcre_repetitions(ruleset)
    # Create parser - use default parser
    Test0 = parser.parser("pcre_parser")
    # Load REs
    Test0.set_text(preprocessed)
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    return ["Sourdis Bispo", data[0], data[1]]
Esempio n. 12
0
def get_sb(ruleset):
    # Create sourdis_bispo_nfa object
    cn = sourdis_bispo_nfa.sourdis_bispo_nfa()
    # Preprocess the REs
    preprocessed = cn.find_pcre_repetitions(ruleset)
    # Create parser - use default parser
    Test0 = parser.parser("pcre_parser")
    # Load REs
    Test0.set_text(preprocessed)
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    return ["Sourdis Bispo", data[0], data[1]]
Esempio n. 13
0
def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "ho:d:si:S:",
            ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        else:
            assert False, "unhandled option " + o

    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)

    fsm.remove_epsilons()

    print "State num: ", fsm.get_state_num()
    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_FA_format(output_file)
    if display_file != None:
        fsm.show(display_file, " ")
Esempio n. 14
0
def get_sp_s(ruleset, stride):
    """
        Generate number of LUTs and FFs for strided Sidhu and Prasana's approach.
    """
    # Create sindhu_prasana_nfa object
    cn = sindhu_prasana_nfa.sindhu_prasana_nfa(stride)
    # Create parser - use default parser
    Test0 = parser.parser()
    # Load RE file
    Test0.load_file(ruleset)
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    return ["Sidhu Prasana with stride ", stride, data[0], data[1]]
Esempio n. 15
0
def get_sp_s(ruleset, stride):
    """
        Generate number of LUTs and FFs for strided Sidhu and Prasana's approach.
    """
    # Create sindhu_prasana_nfa object
    cn = sindhu_prasana_nfa.sindhu_prasana_nfa(stride)
    # Create parser - use default parser
    Test0 = parser.parser()
    # Load RE file
    Test0.load_file(ruleset)
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    return ["Sidhu Prasana with stride ", stride, data[0], data[1]]
Esempio n. 16
0
def get_hybfas(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        History FA (Suchodol).
    """
    # Create parser - use default parser
    po = parser.parser()
    # Create Hybrid FA object
    hyb_fa = JHybridFA()
    # Set parser
    hyb_fa.set_parser(po)
    # Parse input file
    hyb_fa.load_file(ruleset)
    # Create Hybrid FA
    hyb_fa.compute()
    # Return experimental results
    return ["Hybrid FA (Suchodol)", hyb_fa.get_state_num(), hyb_fa.get_trans_num(), hyb_fa.report_memory_optimal(), hyb_fa.report_memory_naive()]
Esempio n. 17
0
def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ho:d:si:S:", ["help", "output=", "display=", "simple", "input=", "style="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(1)
    output_file = None
    input_file = None
    display_file = None
    simple = False
    fsm_style = "nfa"
    for o, a in opts:
        if o == "-s":
            simple = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-o", "--output"):
            output_file = a
        elif o in ("-i", "--input"):
            input_file = a
        elif o in ("-d", "--display"):
            display_file = a
        else:
            assert False, "unhandled option " + o

    if (input_file == None or output_file == None):
        print "Input file with PCRE patterns and output file for FSM must be specified!"
        sys.exit(1)

    fsm = b_automaton.b_Automaton()
    p = parser.parser("pcre_parser")
    p.load_file(input_file)
    fsm.create_by_parser(p)

    fsm.remove_epsilons()

    print "State num: ", fsm.get_state_num()
    if simple == True:
        fsm.remove_char_classes()
    fsm.save_to_FA_format(output_file)
    if display_file != None:
        fsm.show(display_file, " ")
Esempio n. 18
0
def get_clark_s(ruleset, stride):
    """
        Generate number of LUTs and FFs for strided Clark's approach with char 
        classes.
    """
    # Create clark_nfa object
    cn = clark_nfa.clark_nfa(False, stride)
    # Create parser - use default parser
    Test0 = parser.parser("pcre_parser")
    # Load RE file
    Test0.load_file(ruleset)
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    return ["Clark with stride ", stride, data[0], data[1]]
Esempio n. 19
0
def get_clark_s(ruleset, stride):
    """
        Generate number of LUTs and FFs for strided Clark's approach with char 
        classes.
    """
    # Create clark_nfa object
    cn = clark_nfa.clark_nfa(False, stride)
    # Create parser - use default parser
    Test0 = parser.parser("pcre_parser")
    # Load RE file
    Test0.load_file(ruleset)
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    return ["Clark with stride ", stride, data[0], data[1]]
Esempio n. 20
0
def get_ddfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for DDFA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)
    # Create Delay  DFA object
    DelayDfa = DELAY_DFA()
    # Make automaton from RE which was in input file
    DelayDfa.create_by_parser(po)
    # Make Delay DFA
    # Resolve alphabet
    DelayDfa.resolve_alphabet()
    # Create Delay DFA
    DelayDfa.compute()
    # Return experimental results
    return ["Delay DFA", DelayDfa.get_state_num(), DelayDfa.get_trans_num(), DelayDfa.report_memory_optimal(), DelayDfa.report_memory_naive()]
Esempio n. 21
0
 def test_strided(self):
     """
         Tests the algorithm with stride = 2.
     """
     # Create sindhu_prasana_nfa object
     cn = sindhu_prasana_nfa.sindhu_prasana_nfa(2)
     # Create parser - use default parser
     Test0 = parser.parser("pcre_parser")
     # Load RE file
     Test0.load_file(aux_func.getPatternMatchDir() +
                     "/algorithms/sindhu_prasana_nfa/test/tests/test.pcre")
     # Parse RE and create NFA
     cn.create_by_parser(Test0)
     # Call the compute method
     cn.compute()
     # Create the simulation
     create_simulation(cn)
     # run the simulation
     run_simulation(self)
Esempio n. 22
0
 def test_strided(self):
     """
         Tests the algorithm with stride = 2.
     """
     # Get test directory
     tdir = aux_func.getPatternMatchDir() + "/algorithms/clark_nfa/test/"
     # Create clark_nfa object
     cn = clark_nfa.clark_nfa(False, 2)
     # Create parser - use default parser
     Test0 = parser.parser("pcre_parser")
     # Load RE file
     Test0.load_file(tdir + "tests/test.pcre")
     # Parse RE and create NFA
     cn.create_by_parser(Test0)
     # Call the compute method
     cn.compute()
     # Create the simulation
     create_simulation(cn)
     # run the simulation
     run_simulation(self)
Esempio n. 23
0
def get_phf(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        Perfect hashing DFA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)  
    # create phf_dfa automaton
    aut = PHF_DFA()
    # Make automaton from RE which was in input file
    aut.create_by_parser(po)
    # redefine default PHF class 
    a = bdz()
    a.set_ratio(2.0)
    aut.set_PHF_class(a)
    # compute dfa and PHF table
    aut.compute()
    # Return experimental results
    return ["Perfect Hashing DFA", aut.get_state_num(), aut.get_trans_num(), aut.report_memory_real(), aut.report_memory_real()]
Esempio n. 24
0
def get_hybfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        History FA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)    
    # Create Hybrid FA object
    hyb_fa = hybrid_fa()
    # Make automaton from RE which was in input file
    hyb_fa.create_by_parser(po)
    # set parameters for _is_special() function
    hyb_fa.set_max_head_size(-1) # off
    hyb_fa.set_max_tx(-1) # off
    hyb_fa.set_special_min_depth(2)
    # Create Hybrid FA
    hyb_fa.compute()
    # Return experimental results
    return ["Hybrid FA", hyb_fa.get_state_num(), hyb_fa.get_trans_num(), hyb_fa.report_memory_optimal(), hyb_fa.report_memory_naive()]
Esempio n. 25
0
 def test_sb(self):
     """
         Tests the Sordis-Bispo algorithm.
     """
     # Create sourdis_bispo_nfa object
     cn = sourdis_bispo_nfa.sourdis_bispo_nfa()
     # Preprocess the REs
     preprocessed = cn.find_pcre_repetitions(
         aux_func.getPatternMatchDir() +
         "/algorithms/sourdis_bispo_nfa/test/tests/test.pcre")
     # Create parser - use default parser
     Test0 = parser.parser("pcre_parser")
     # Load REs
     Test0.set_text(preprocessed)
     # Parse RE and create NFA
     cn.create_by_parser(Test0)
     # Call the compute method
     cn.compute()
     # Create the simulation
     create_simulation(cn)
     # run the simulation
     run_simulation(self)
Esempio n. 26
0
def get_hfa(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        History FA.
    """
    # Create parser - use default parser
    po = parser.parser()
    # Parse input file
    po.load_file(ruleset)
    # Create History FA object
    history = HistoryFA()
    # Make automaton from RE which was in input file
    history.create_by_parser(po)
    # Remove epsilons
    history.remove_epsilons()
    # Store the NFA
    NFA = history.get_automaton(True)
    # Determinise the automaton
    history.determinise(create_table = True)
    # Create History FA
    history.compute(NFA)
    # Return experimental results
    return ["History FA", history.get_state_num(), history.get_trans_num(), history.report_memory_optimal(), history.report_memory_naive()]
Esempio n. 27
0
def get_hybfas(ruleset):
    """
        Generate number of states, transitions and consumed memory for        \
        History FA (Suchodol).
    """
    # Create parser - use default parser
    po = parser.parser()
    # Create Hybrid FA object
    hyb_fa = JHybridFA()
    # Set parser
    hyb_fa.set_parser(po)
    # Parse input file
    hyb_fa.load_file(ruleset)
    # Create Hybrid FA
    hyb_fa.compute()
    # Return experimental results
    return [
        "Hybrid FA (Suchodol)",
        hyb_fa.get_state_num(),
        hyb_fa.get_trans_num(),
        hyb_fa.report_memory_optimal(),
        hyb_fa.report_memory_naive()
    ]
Esempio n. 28
0
"""

if __name__ == '__main__':
    # Clark mapping method without suppotr for char classes
    print("-------------------------------------------------------------------")
    print("                     Example of use: Clark NFA                     ")
    print("-------------------------------------------------------------------")
    print(" Ruleset: ../../rules/L7/selected.pcre                             ")
    print(" Character Classes: No                                             ")
    print(" Strided: No                                                       ")
    print(" Genereated VHDL output: clark_nfa_impl_wochc.vhd                  ")
    print("-------------------------------------------------------------------")
    # Create clark_nfa object
    cn = clark_nfa.clark_nfa(True)
    # Create parser - use default parser
    Test0 = parser.parser("pcre_parser")
    # Load RE file
    Test0.load_file("../../rules/L7/selected.pcre")
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    print(" Used LUTs estimation: " + str(data[0]))
    print(" Used FFs estimation: " + str(data[1]))
    print("-------------------------------------------------------------------")
    # Save implementation
    # Open file
    f = open("clark_nfa_impl_wochc.vhd", "w")
    # Get VHDL code and write the code
Esempio n. 29
0
def test():
    """
        Run searching using pcregrep and PHF_DFA. Prints out the results.
    """
    # parse options
    usage = "usage: %prog rules.pcre pcap_dir/ [options]"
    optparser = OptionParser(usage=usage)
    optparser.add_option("-O", "--outputfile", dest="resultfile", help="output file for results, default is stdout")
    optparser.add_option("-P", "--PerPacket", dest="PerPacket", action="store_true", default=False,
                      help="compare nonfaulty matching for flows and packets, faulty algorithm is used only with flows")
    optparser.add_option("-s", "--showprogress", dest="progress", action="store_true", default=False,
                      help="show progress of computation")
    optparser.add_option("-C", "--count", dest="maxiter", type="int", default="1", help="number of test iterations")
    optparser.add_option("-F", "--faulty", dest="FAULTY", type="int", default="0", help="number of bits for compress hash, default is 0 (no faulty transitions)")
    optparser.add_option("-D", "--debuglevel", dest="DEBUG", type="int", default="0", help="debug output level (0-2)")
    optparser.add_option("-S", "--savefile", dest="savefile", default="", metavar="FILE", help="save nfa_data in FILE")
    optparser.add_option("-L", "--loadfile", dest="autfile", default="", metavar="FILE", help="load nfa_data from FILE")
    optparser.add_option("-N", "--nonfaulty", dest="NonFaulty", action="store_true", default=False,
                      help="try to generate PHF table without collisions, therefore ensure nonfaulty matching. Experimental code. "
                            "May take a long time with small compress hash output.")
    (options, args) = optparser.parse_args()

    global FAULTY, DEBUG
    if len(args) != 2:
       print "You must specify rules.pcre and pcap_dir/"
       optparser.print_usage()
       exit(1)
    rulesfile, inputdir = args
    PerPacket, resultfile,  maxiter, autfile, savefile, FAULTY, DEBUG = options.PerPacket, options.resultfile, options.maxiter, options.autfile, options.savefile, options.FAULTY, options.DEBUG
    progress = options.progress
    NonFaulty = options.NonFaulty
    
    if inputdir[-1] == "/":
        inputdir = inputdir[:-1] # remove '/' from the end
    rules = open(rulesfile, 'rb')
    if PerPacket:
        packetdir = inputdir + "/packets"
        inputdir = inputdir + "/flows"
    if resultfile:
        sys.stdout = open(resultfile, 'a')
    totalhits, totalfp, totalfn = (0, 0, 0)
    iter = 0    
    while iter != maxiter:
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r',"pcregrep",
        if not iter:
            # prepare pcregrep
            p = subprocess.Popen("cd pcre-8.20/ && make pcregrep", shell=True, stdout=subprocess.PIPE)
            p.wait()
            results = dict()
        file_list = list()
        rule_count = len(open(rulesfile).readlines())
        for root, dirs, files in os.walk(inputdir):
            for i in files:
                i = os.path.join(root, i)
                file_list.append(i)
                if not iter:
                    results[i] = [rule_count*[0],rule_count*[0],rule_count*[0]]
                else:
                    results[i][0] = rule_count*[0]
        #results = init_results
        rule_num = 0
        grep_reg_exp = "grep_reg_exp." + str(os.getpid())
        for rule in rules:
            if not iter:
                if DEBUG:
                    print rule,
                (grep_rule, grep_params) = parse_rule(rule)
                f = open(grep_reg_exp, 'w')
                f.write(grep_rule)
                f.close()
                p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + inputdir, shell=True, stdout=subprocess.PIPE)
                p.wait()
                for out in p.stdout:
                    item = out.split()[0]
                    results[item][1][rule_num] = 1
                if PerPacket:
                    p = subprocess.Popen("pcre-8.20/pcregrep --buffer-size 50000 --color=auto -N ANYCRLF" + grep_params + " -r -l -f " + grep_reg_exp + " " + packetdir, shell=True, stdout=subprocess.PIPE)
                    p.wait()
                    for out in p.stdout:
                        item =  inputdir + "/" + out.split()[0].split("-")[1].replace("_", "/")
                        results[item][2][rule_num] = 1

            rule_num += 1
        try:
            os.remove(grep_reg_exp)
        except:
            pass
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r', "create automaton",
        #aut = b_Automaton()
        aut = PHF_DFA()
        if autfile:
            aut.create_from_nfa_data(nfa_data().load_from_file(autfile))
        else:
            par = parser("pcre_parser")
            #par.set_text(rule)
            par.load_file(rulesfile)
            aut.create_by_parser(par)
            if DEBUG:
                aut.show("NFA.dot")
            #aut.remove_epsilons()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "resolve alphabet",
            aut.resolve_alphabet()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "determinise",
            aut.determinise()
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "minimise",
            aut.minimise()
            if DEBUG:
                aut.show("DFA.dot")
            if savefile:
                aut._automaton.save_to_file(savefile)
        aut._automaton1 = aut._automaton
        aut.set_table_parameters((20,10))
        if DEBUG > 1:
            print "Without fallback state:"
            print "Symbols:", len(aut._automaton.alphabet)
            print "States:", len(aut._automaton.states)
            print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%"
        if isinstance(aut, PHF_DFA):
            if progress:
                print >>sys.stderr, "\r", ' '*80, '\r', "generate PHF",
            if aut.get_trans_num() == (aut.get_state_num() * aut.get_alpha_num()):
                aut.enable_fallback_state(warning=False)
            if FAULTY:
                aut.enable_faulty_transitions(FAULTY)
                if NonFaulty:
                    aut.enable_faulty_check()
            aut.compute()
            if DEBUG:
                print "Fallback state:", aut.fallback_state
                print "Symbols:", len(aut._automaton.alphabet)
                print "States:", len(aut._automaton.states)
                print "Transitions:", aut.get_trans_num(), float(aut.get_trans_num()) / (aut.get_state_num() * aut.get_alpha_num()) * 100, "%"
        count = 1
        all = len(file_list)
        if progress:
            print >> sys.stderr, '\r' + 80*' ' + '\r',
        for f in file_list:
            # progress
            if progress:
                print >> sys.stderr, '\r',
                print >> sys.stderr, str(iter+1)+'/'+str(maxiter)+ ":", count, '/', all,
#                sys.stderr.flush()
            count += 1
            data = open(f, 'rb').read()
            results[f][0] = aut.search(data)
        if progress:
            print >>sys.stderr, "\r", ' '*80, '\r', "compare results",
        if isinstance(aut, PHF_DFA) and DEBUG:
            if DEBUG > 1:
                print "List of collisions:"
                print aut.collisions
                for tran, i in aut.collisions.iteritems():
                    #print tran, i
                    print  BitArray(bytes=tran[0], length=aut.symbol_bits).uint, BitArray(bytes=tran[1], length=aut.state_bits).uint, i
                    print "SYM:", aut._automaton.alphabet[BitArray(bytes=tran[0], length=aut.symbol_bits).uint]
            print "Bad transitions:", aut.bad_transitions
            print "Collisions:", len(aut.collisions)
            print "Compress bits:", aut.compress_bits
        stats = compare_results(results)
        stats = list(stats)
        if stats[0] == 0:
            print "Zero hits, cannot compute F-measure!"
            stats[0] = 1
        if DEBUG:
            print "Total number of searched packets/flows:", stats[3]
        print "Hits:", stats[0]
        totalhits += stats[0]
        totalfp += stats[1]
        totalfn += stats[2]
        precis = float(stats[0])/(stats[1]+stats[0])
        recall = float(stats[0])/(stats[0]+stats[2])
        fmeas = 2* precis * recall / (precis + recall)
        print "False positives:", stats[1], precis*100, "%"
        print "False negatives:", stats[2], recall*100, "%"
        print "F-measure:", fmeas*100, "%"
        if PerPacket:
            print "Per packet errors:", stats[4], stats[5]
        print '-'*80
        iter += 1
    print "Total stats:"
    precis = float(totalhits)/(totalfp + totalhits)
    recall = float(totalhits)/(totalfn + totalhits)
    fmeas = 2* precis * recall / (precis + recall)
    print "Hits:", totalhits
    print "False positives:", totalfp, precis*100, "%"
    print "False negatives:", totalfn, recall*100, "%"
    print "F-measure:", fmeas*100, "%"
    print "_"*80
Esempio n. 30
0
    def _make_his_fa(self, file_name):
        """
            Fuction for make History based FA from RE in FileName.

            :param file_name: Name of input file
            :type file_name: string
        """

        # Discover before part, according .* or .{m} like pattern,
        # and change .{m} to .* like.
        b = []  # keys for closure states
        cl = []  # closure NFA states
        cn = []  # counters for counting constraint
        cn_i = []  # counters index
        fr = open(file_name, "r")
        tmp = tempfile.NamedTemporaryFile(delete=False)
        for line in fr.readlines():
            # remove '\n' from end of line
            line = line.rsplit('\n', 1)[0]
            pattern = re.compile(
                r"""
            (?<=[^/])[.](?=[*])   # for like .*
            (?![^[]*?\])          # because of .* not in [.*]
            |
            \[\^
            .*?                   # for like [^abc]*
            (?<!\\)\](?=[*])
            (?![^[]*?\])
            |
            [.](?=[{])            # for like .{15}
            (?![^[]*?\])
            |
            .(?=[{])              # for like a{15}
            (?![^[]*?\])
            |
            \[\^?                 # for like [abc]{15} or [^abc]{15}
            .*?
            (?<!\\)\](?=[{])
            (?![^[]*?\])
            """, re.X)
            # split line to before (split[0]) and after (split[1]) part
            split = re.split(pattern, line, maxsplit=1)
            # remove .* from begin of pattern
            if split[0].find(".*") != -1:
                split[0] = '/' + split[0][split[0].find(".*") + 2:]
            # line contain .* or .{m} like pattern
            if len(split) == 2:
                b.append(split[0][1:])
                cl.append([])
                if split[1][0] == '{':
                    cn.append(int(split[1][1:split[1].find('}', 1)]))
                    cn_i.append(len(cn) - 1)
                    # replace .{m} like to .* like
                    line = line[:line.find(split[1])] + '*' + \
                    split[1][split[1].find('}') + 1:]
                else:
                    cn.append(-1)
            # append line to tmp file
            tmp.write(line + '\n')
        fr.close()
        tmp.close()
        # Make DFA.
        # Parse input file
        par = parser("pcre_parser")
        par.load_file(tmp.name)
        # Make automat from RE which was in input file
        self.create_by_parser(par)
        # Make Deterministic FA
        self.determinise(True)
        # remove temporary file
        os.unlink(tmp.name)
        # Adjustment _state_representation list.
        m = list(min(self._state_representation[1:]))
        r = self._state_representation
        for i in range(0, len(r)):
            for x in m:
                if x in r[i]:
                    r[i].remove(x)
            r[i] = list(r[i])
    # Discover closure state.
        a = self._automaton
        cur_state = a.start
        # sort transitions
        sort = {}  # sorted transitions
        for s in range(0, len(a.states)):
            sort[s] = []
        for t in a.transitions:
            sort[t[0]].append(t[1:])
        for i in range(0, len(b)):
            for c in b[i]:
                for t in sort[cur_state]:
                    prev_state = cur_state
                    # single character
                    if isinstance(a.alphabet[t[0]], sym_char.b_Sym_char):
                        if a.alphabet[t[0]].char == c:
                            cur_state = t[-1]
                            # skip other transitions
                            break
                    # character range
                    else:
                        if c in a.alphabet[t[0]].charClass:
                            cur_state = t[-1]
                            # skip other transitions
                            break
            # remove closure transition
            a.transitions.remove((prev_state, t[0], t[1]))
            cl[i] = r[cur_state][-1] + 1
            # append closure history transition
            if cn[i] == -1:
                a.transitions.add((prev_state, t[0], t[1], '|', -2, i))
            else:
                # counting constraint
                a.transitions.add(
                    (prev_state, t[0], t[1], '|', -2, i, cn[i], cn_i.index(i)))
            cur_state = a.start
    # Discover fading states and their overlap states.
        f = []  # fading states (DFA)
        f_d = {}  # fading states in dictionary, key is string before
        o = []  # overlap states according fading states (DFA)
        for i in range(0, len(b)):
            f_d[b[i]] = []
            c_s = cl[i]  # c_s is closure states (NFA)
            for j in range(0, len(r)):
                if c_s in r[j]:
                    over = list(r[j])
                    for s in cl:
                        if s in over:
                            over.remove(s)
                    # append only states which have overlap state
                    if over in r:
                        if j not in f:
                            f.append(j)
                            f_d[b[i]].append(j)
                            # append overlap DFA state
                            o.append(r.index(over))
    # Remove fading states.
        for s in f:
            del a.states[s]
        # delete deleted states from finite states
        for s in list(a.final):
            if s in f:
                a.final.remove(s)
        # change numbering for states
        a_s = sorted(a.states.keys())  # aux. states
        tmp = {}
        for k in a_s:
            tmp[a_s.index(k)] = a.states[k]
        a.states = tmp
        # change numbering for final states
        tmp = set()
        for s in a.final:
            tmp.add(a_s.index(s))
        a.final = tmp
        # change numbering for start state
        a.start = a_s.index(a.start)
        # Change fading transitions.
        # transition is tuple of 3 numbers plus history properties
        # like: (S, A, D)
        # S is source state
        # A is alphabet (transition char)
        # D is destination state
        # like: (S, A, D, -2, f_i, '|', -4, f_i)
        # -2 is flag which is needed for execution transition
        # f_i is flag index
        # '|' separating needed properties for execution tran. of properties
        # which will be set after tran. finished
        # -4 flag which will be set after tran. finish
        # f_i is flag index
        # flags: -2 = set, -3 = must be set, -4 = reset, -5 = must be reset
        #c_s_D = []  # closure state DFA !
        #print "cl:",cl
        #print "r:",r
        #for c_s_N in cl:
        #print "c_s_N:",c_s_N
        #c_s_D.append(r.index([c_s_N]))
        aux = set()
        d = {}  # transitions for pruning process
        tmp = []  # helpful list for pruning process
        tmp_t = []  # helpful list for pruning process
        for t in a.transitions:
            t = list(t)
            # destination state is fading state
            if t[2] in f:
                # source state is fading state
                if t[0] in f:
                    # this transition only if is set flag (=)
                    # append must be set flag
                    for i in range(0, len(b)):
                        if t[0] in f_d[b[i]]:
                            t.append(-3)
                            t.append(i)
                            if cn[i] != -1:
                                # append counting constraint counter 0
                                t.append(0)
                                t.append(cn_i.index(i))
                    t.append('|')
                    # change to overlap state
                    t[0] = o[f.index(t[0])]
                # source state is non fading
                else:
                    if '|' not in t:
                        t.append('|')
                    # append set flag
                    for i in range(0, len(b)):
                        if t[2] in f_d[b[i]]:
                            t.append(-2)
                            t.append(i)
                            if cn[i] != -1:
                                # append counting constraint set counter
                                t.append(cn[i])
                                t.append(cn_i.index(i))
                # change des. state to overlap state
                t[2] = o[f.index(t[2])]
            # source state is fading state
            elif t[0] in f:
                # append must be set flag
                for i in range(0, len(b)):
                    if t[0] in f_d[b[i]]:
                        t.append(-3)
                        t.append(i)
                        if cn[i] != -1:
                            # append counting constraint counter 0
                            t.append(0)
                            t.append(cn_i.index(i))
                t.append('|')
                # destination state is non-fading state
                # reset flag
                for i in range(0, len(b)):
                    if t[0] in f_d[b[i]]:
                        t.append(-4)
                        t.append(i)
                        if cn[i] != -1:
                            # append counting constraint reset counter
                            t.append(0)
                            t.append(cn_i.index(i))
                # change to overlap state
                t[0] = o[f.index(t[0])]
            # change numbering
            t[0] = a_s.index(t[0])
            t[2] = a_s.index(t[2])
            aux.add(tuple(t))
            # for pruning process
            if len(t) == 3:
                t_a = t + ['|']
            else:
                t_a = t[0:3] + t[t.index('|'):]
            t_a = tuple(t_a)
            if t_a in tmp:
                # select similarly transitions
                if not d.has_key(t_a):
                    d[t_a] = [tmp_t[tmp.index(t_a)]]
                if t not in d[t_a]:
                    d[t_a].append(t)
            else:
                tmp.append(t_a)
                tmp_t.append(t)
        a.transitions = aux
        # Pruning process (remove similarly transitions).
        for key in d.keys():
            # exist transition without condition
            if list(key[0:3]) in d[key]:
                # keep only this transition
                d[key].remove(list(key[0:3]))
                for t in d[key]:
                    a.transitions.remove(tuple(t))
            else:
                # find set and remove all her subset
                c = []  # auxiliary, conditions for tran.
                c_l = []  # auxiliary, conditions length
                tran = []  # auxiliary, original tran.
                for t in d[key]:
                    aux = []
                    for i in range(3, t.index('|'), 2):
                        aux += [tuple(t[i:i + 2])]
                    c.append(aux)
                    c_l.append(len(aux))
                    tran.append(tuple(t))
                while len(c) != 0:
                    s = c[c_l.index(min(c_l))]  # set
                    for t_c in list(c):
                        flag = 1
                        for x in s:
                            if x not in t_c:
                                flag = 0
                                break
                        # tran. fall into this set
                        if flag and s != t_c:
                            # remove subset tran.
                            del c_l[c.index(t_c)]
                            a.transitions.remove(tran[c.index(t_c)])
                            c.remove(t_c)
                    # remove set (only from c)
                    del c_l[c.index(s)]
                    c.remove(s)
    # Append must be reset flag.
        c = {}  # tran. which have must be set flag
        c_i = {}  # indexes of flags
        n = {}  # tran. which have not must be set flag
        for s in range(0, len(a.states)):
            c[s] = []
            c_i[s] = []
            n[s] = []
        for t in a.transitions:
            added = False
            t = list(t)
            # must be set tran.
            if len(t) > 3 and (t.index('|') - 3) > 0:
                tmp = []
                for i in range(3, t.index('|'), 2):
                    if t[i] == -3:
                        tmp.append(t[i + 1])
                if tmp != []:
                    if tuple(t) not in c[t[0]]:
                        c[t[0]].append(tuple(t))
                        c_i[t[0]].append(tmp)
                    added = True
            # not must be set tran.
            if not added:
                if tuple(t) not in n[t[0]]:
                    n[t[0]].append(tuple(t))
        for s in range(0, len(a.states)):
            for t in c[s]:
                # discover tran. char
                if isinstance(a.alphabet[t[1]], sym_char.b_Sym_char):
                    chars = list(a.alphabet[t[1]].char)
                else:
                    chars = list(a.alphabet[t[1]].charClass)
                for t_n in list(n[s]):
                    if isinstance(a.alphabet[t_n[1]], sym_char.b_Sym_char):
                        chars_n = list(a.alphabet[t_n[1]].char)
                    else:
                        chars_n = list(a.alphabet[t_n[1]].charClass)
                    for char in chars_n:
                        if char in chars:
                            # remove bad tran.
                            a.transitions.remove(t_n)
                            n[s].remove(t_n)
                            # append must be reset flag
                            t_n = list(t_n)
                            if '|' not in t_n:
                                for i in c_i[s][c[s].index(t)]:
                                    t_n.append('-5')
                                    t_n.append(i)
                                t_n.append('|')
                            else:
                                for i in c_i[s][c[s].index(t)]:
                                    t_n.insert(3, '-5')
                                    t_n.insert(4, i)
                            a.transitions.add(tuple(t_n))
                            break

    # Count of flags and counters.
        self.flag_c = len(b)
        self.ctr_c = len(cn_i)
        self.cn_i = cn_i
        # Add automat flag.
        a.Flags["History FA"] = True
Esempio n. 31
0
    def test_search(self):
        """search()"""
        # For RE upstairs do tests for this method.

        # /abcd/; test with an expression that does not use properties
        # of HistoryFA
        par = parser("pcre_parser")
        par.set_text("/abcd/")
        history = HistoryFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        history.compute(NFA)

        self.assertTrue(history.search("efg") == [0])
        self.assertTrue(history.search("efabcg") == [0])
        self.assertTrue(history.search("efabcd") == [1])
        self.assertTrue(history.search("abcefabcdabcx") == [1])
        self.assertTrue(history.search("abcd") == [1])
        self.assertTrue(history.search("123abcd456") == [1])
        self.assertTrue(history.search("123ab0cd456") == [0])
 
        # /ab.*cd /; test with an expression that contains .*
        par = parser("pcre_parser")
        par.set_text("/ab.*cd/")
        history = HistoryFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        history.compute(NFA)

        self.assertTrue(history.search("efg") == [0])
        self.assertTrue(history.search("abnecoefg") == [0])
        self.assertTrue(history.search("abnecocx") == [0])
        self.assertTrue(history.search("necoabnecocdneco") == [1])
        self.assertTrue(history.search("abcd") == [1])
        self.assertTrue(history.search("123abcd456") == [1])
        self.assertTrue(history.search("123ab0cd456") == [1])
 
        # /ab[^1234]*cd|efg/; test with an expression containing one
        # alternation [^1234]*, the second is not
        par = parser("pcre_parser")
        par.set_text("/ab[^1234]*cd|efg/")
        history = HistoryFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        history.compute(NFA)

        self.assertTrue(history.search("exfg") == [0])
        self.assertTrue(history.search("abnecocx") == [0])

        self.assertTrue(history.search("efg") == [1])
        self.assertTrue(history.search("textefgtext") == [1])
        self.assertTrue(history.search("abnecoefg") == [1])

        self.assertTrue(history.search("necoabnecocdneco") == [1])
        self.assertTrue(history.search("abcd") == [1])
        self.assertTrue(history.search("123abcd456") == [1])
        self.assertTrue(history.search("123ab0cd456") == [1])
 
        self.assertTrue(history.search("ab1cd") == [0])
        self.assertTrue(history.search("ab4cd") == [0])
        self.assertTrue(history.search("abcefg") == [1])
        self.assertTrue(history.search("123abblekekcd456") == [1])
        self.assertTrue(history.search("123ab0cd456") == [1])
Esempio n. 32
0
        "-------------------------------------------------------------------")
    print(
        "                  Example of use: History FA                       ")
    print(
        "-------------------------------------------------------------------")
    print(
        " Ruleset: /ab.*cd/                                                 ")
    print(
        " Graphical representation: automaton.dot                           ")
    print(
        "                           history.dot                             ")
    print(
        "-------------------------------------------------------------------")

    # Create PCRE parser object
    par = parser("pcre_parser")
    # Set the ruleset
    par.set_text("/ab.*cd/")

    # Create HistoryFA object
    history = HistoryFA()
    # Parse the ruleset
    history.create_by_parser(par)
    # Remove epsilons
    history.remove_epsilons()
    # Get copy of NFA part
    NFA = history.get_automaton(True)
    # Determinise the automaton. Record corespondance between DFA states
    # and NFA states
    history.determinise(create_table=True)
    # Create the History FA from deterministic automaton original NFA.
Esempio n. 33
0
    def _make_his_fa(self, file_name):
        """
            Fuction for make History based FA from RE in FileName.

            :param file_name: Name of input file
            :type file_name: string
        """

    # Discover before part, according .* or .{m} like pattern,
    # and change .{m} to .* like.
        b = []    # keys for closure states
        cl = []   # closure NFA states
        cn = []   # counters for counting constraint
        cn_i = [] # counters index
        fr = open(file_name, "r")
        tmp = tempfile.NamedTemporaryFile(delete=False)
        for line in fr.readlines():
            # remove '\n' from end of line
            line = line.rsplit('\n', 1)[0]
            pattern = re.compile(r"""
            (?<=[^/])[.](?=[*])   # for like .*
            (?![^[]*?\])          # because of .* not in [.*]
            |
            \[\^
            .*?                   # for like [^abc]*
            (?<!\\)\](?=[*])
            (?![^[]*?\])
            |
            [.](?=[{])            # for like .{15}
            (?![^[]*?\])
            |
            .(?=[{])              # for like a{15}
            (?![^[]*?\])
            |
            \[\^?                 # for like [abc]{15} or [^abc]{15}
            .*?
            (?<!\\)\](?=[{])
            (?![^[]*?\])
            """, re.X)
            # split line to before (split[0]) and after (split[1]) part
            split = re.split(pattern, line, maxsplit=1)
            # remove .* from begin of pattern
            if split[0].find(".*") != -1:
                split[0] = '/' + split[0][split[0].find(".*") + 2:]
            # line contain .* or .{m} like pattern
            if len(split) == 2:
                b.append(split[0][1:])
                cl.append([])
                if split[1][0] == '{':
                    cn.append(int(split[1][1:split[1].find('}', 1)]))
                    cn_i.append(len(cn) - 1)
                    # replace .{m} like to .* like
                    line = line[:line.find(split[1])] + '*' + \
                    split[1][split[1].find('}') + 1:]
                else :
                    cn.append(-1)
            # append line to tmp file
            tmp.write(line + '\n')
        fr.close()
        tmp.close()
    # Make DFA.
        # Parse input file
        par = parser("pcre_parser")
        par.load_file(tmp.name)
        # Make automat from RE which was in input file
        self.create_by_parser(par)
        # Make Deterministic FA
        self.determinise(True)
        # remove temporary file
        os.unlink(tmp.name)
    # Adjustment _state_representation list.
        m = list(min(self._state_representation[1:]))
        r = self._state_representation
        for i in range(0, len(r)):
            for x in m:
                if x in r[i]:
                    r[i].remove(x)
            r[i] = list(r[i])
    # Discover closure state.
        a = self._automaton
        cur_state = a.start
        # sort transitions
        sort = {} # sorted transitions
        for s in range(0, len(a.states)):
            sort[s] = []
        for t in a.transitions:
            sort[t[0]].append(t[1:])
        for i in range(0, len(b)):
            for c in b[i]:
                for t in sort[cur_state]:
                    prev_state = cur_state
                    # single character
                    if isinstance(a.alphabet[t[0]], sym_char.b_Sym_char):
                        if a.alphabet[t[0]].char == c:
                            cur_state = t[-1]
                            # skip other transitions
                            break
                    # character range
                    else :
                        if c in a.alphabet[t[0]].charClass:
                            cur_state = t[-1]
                            # skip other transitions
                            break
            # remove closure transition
            a.transitions.remove((prev_state, t[0], t[1]))
            cl[i] = r[cur_state][-1] + 1
            # append closure history transition
            if cn[i] == -1:
                a.transitions.add((prev_state, t[0], t[1], '|',
                -2, i))
            else :
                # counting constraint
                a.transitions.add((prev_state, t[0], t[1], '|',
                -2, i, cn[i], cn_i.index(i)))
            cur_state = a.start
    # Discover fading states and their overlap states.
        f = []    # fading states (DFA)
        f_d = {}  # fading states in dictionary, key is string before
        o = []    # overlap states according fading states (DFA)
        for i in range(0, len(b)):
            f_d[b[i]] = []
            c_s = cl[i]   # c_s is closure states (NFA)
            for j in range(0, len(r)):
                if c_s in r[j]:
                    over = list(r[j])
                    for s in cl:
                        if s in over:
                            over.remove(s)
                    # append only states which have overlap state
                    if over in r:
                        if j not in f:
                            f.append(j)
                            f_d[b[i]].append(j)
                            # append overlap DFA state
                            o.append(r.index(over))
    # Remove fading states.
        for s in f:
            del a.states[s]
        # delete deleted states from finite states
        for s in list(a.final):
            if s in f:
                a.final.remove(s)
        # change numbering for states
        a_s = sorted(a.states.keys()) # aux. states
        tmp = {}
        for k in a_s:
            tmp[a_s.index(k)] = a.states[k]
        a.states = tmp
        # change numbering for final states
        tmp = set()
        for s in a.final:
            tmp.add(a_s.index(s))
        a.final = tmp
        # change numbering for start state
        a.start = a_s.index(a.start)
    # Change fading transitions.
        # transition is tuple of 3 numbers plus history properties
        # like: (S, A, D)
        # S is source state
        # A is alphabet (transition char)
        # D is destination state
        # like: (S, A, D, -2, f_i, '|', -4, f_i)
        # -2 is flag which is needed for execution transition
        # f_i is flag index
        # '|' separating needed properties for execution tran. of properties
        # which will be set after tran. finished
        # -4 flag which will be set after tran. finish
        # f_i is flag index
        # flags: -2 = set, -3 = must be set, -4 = reset, -5 = must be reset
        #c_s_D = []  # closure state DFA !
        #print "cl:",cl
        #print "r:",r
        #for c_s_N in cl:
            #print "c_s_N:",c_s_N
            #c_s_D.append(r.index([c_s_N]))
        aux = set()
        d = {}    # transitions for pruning process
        tmp = []  # helpful list for pruning process
        tmp_t = []  # helpful list for pruning process
        for t in a.transitions:
            t = list(t)
            # destination state is fading state
            if t[2] in f:
                # source state is fading state
                if t[0] in f:
                    # this transition only if is set flag (=)
                    # append must be set flag
                    for i in range(0, len(b)):
                        if t[0] in f_d[b[i]]:
                            t.append(-3)
                            t.append(i)
                            if cn[i] != -1:
                                # append counting constraint counter 0
                                t.append(0)
                                t.append(cn_i.index(i))
                    t.append('|')
                    # change to overlap state
                    t[0] = o[f.index(t[0])]
                # source state is non fading
                else :
                    if '|' not in t:
                        t.append('|')
                    # append set flag
                    for i in range(0, len(b)):
                        if t[2] in f_d[b[i]]:
                            t.append(-2)
                            t.append(i)
                            if cn[i] != -1:
                                # append counting constraint set counter
                                t.append(cn[i])
                                t.append(cn_i.index(i))
                # change des. state to overlap state
                t[2] = o[f.index(t[2])]
            # source state is fading state
            elif t[0] in f:
                # append must be set flag
                for i in range(0, len(b)):
                    if t[0] in f_d[b[i]]:
                        t.append(-3)
                        t.append(i)
                        if cn[i] != -1:
                            # append counting constraint counter 0
                            t.append(0)
                            t.append(cn_i.index(i))
                t.append('|')
                # destination state is non-fading state
                # reset flag
                for i in range(0, len(b)):
                    if t[0] in f_d[b[i]]:
                        t.append(-4)
                        t.append(i)
                        if cn[i] != -1:
                            # append counting constraint reset counter
                            t.append(0)
                            t.append(cn_i.index(i))
                # change to overlap state
                t[0] = o[f.index(t[0])]
            # change numbering
            t[0] = a_s.index(t[0])
            t[2] = a_s.index(t[2])
            aux.add(tuple(t))
            # for pruning process
            if len(t) == 3:
                t_a = t + ['|']
            else :
                t_a = t[0:3] + t[t.index('|'):]
            t_a = tuple(t_a)
            if t_a in tmp:
                # select similarly transitions
                if not d.has_key(t_a):
                    d[t_a] = [tmp_t[tmp.index(t_a)]]
                if t not in d[t_a]:
                    d[t_a].append(t)
            else :
                tmp.append(t_a)
                tmp_t.append(t)
        a.transitions = aux
    # Pruning process (remove similarly transitions).
        for key in d.keys():
            # exist transition without condition
            if list(key[0:3]) in d[key]:
                # keep only this transition
                d[key].remove(list(key[0:3]))
                for t in d[key]:
                    a.transitions.remove(tuple(t))
            else :
                # find set and remove all her subset
                c = []    # auxiliary, conditions for tran.
                c_l = []  # auxiliary, conditions length
                tran = [] # auxiliary, original tran.
                for t in d[key]:
                    aux = []
                    for i in range(3, t.index('|'), 2):
                        aux += [tuple(t[i:i+2])]
                    c.append(aux)
                    c_l.append(len(aux))
                    tran.append(tuple(t))
                while len(c) != 0:
                    s = c[c_l.index(min(c_l))]  # set
                    for t_c in list(c):
                        flag = 1
                        for x in s:
                            if x not in t_c:
                                flag = 0
                                break
                        # tran. fall into this set
                        if flag and s != t_c:
                            # remove subset tran.
                            del c_l[c.index(t_c)]
                            a.transitions.remove(tran[c.index(t_c)])
                            c.remove(t_c)
                    # remove set (only from c)
                    del c_l[c.index(s)]
                    c.remove(s)
    # Append must be reset flag.
        c = {}  # tran. which have must be set flag
        c_i = {}# indexes of flags
        n = {}  # tran. which have not must be set flag
        for s in range(0, len(a.states)):
            c[s] = []
            c_i[s] = []
            n[s] = []
        for t in a.transitions:
            added = False
            t = list(t)
            # must be set tran.
            if len(t) > 3 and (t.index('|') - 3) > 0:
                tmp = []
                for i in range(3, t.index('|'), 2):
                    if t[i] == -3:
                        tmp.append(t[i+1])
                if tmp != []:
                    if tuple(t) not in c[t[0]]:
                        c[t[0]].append(tuple(t))
                        c_i[t[0]].append(tmp)
                    added = True
            # not must be set tran.
            if not added:
                if tuple(t) not in n[t[0]]:
                    n[t[0]].append(tuple(t))
        for s in range(0, len(a.states)):
            for t in c[s]:
                # discover tran. char
                if isinstance(a.alphabet[t[1]], sym_char.b_Sym_char):
                    chars = list(a.alphabet[t[1]].char)
                else :
                    chars = list(a.alphabet[t[1]].charClass)
                for t_n in list(n[s]):
                    if isinstance(a.alphabet[t_n[1]], sym_char.b_Sym_char):
                        chars_n = list(a.alphabet[t_n[1]].char)
                    else :
                        chars_n = list(a.alphabet[t_n[1]].charClass)
                    for char in chars_n:
                        if char in chars:
                            # remove bad tran.
                            a.transitions.remove(t_n)
                            n[s].remove(t_n)
                            # append must be reset flag
                            t_n = list(t_n)
                            if '|'  not in t_n:
                                for i in c_i[s][c[s].index(t)]:
                                    t_n.append('-5')
                                    t_n.append(i)
                                t_n.append('|')
                            else :
                                for i in c_i[s][c[s].index(t)]:
                                    t_n.insert(3, '-5')
                                    t_n.insert(4, i)
                            a.transitions.add(tuple(t_n))
                            break
    # Count of flags and counters.
        self.flag_c = len(b)
        self.ctr_c = len(cn_i)
        self.cn_i = cn_i
    # Add automat flag.
        a.Flags["History FA"] = True
Esempio n. 34
0
    settings.
"""

if __name__ == '__main__':
    # Sidhu Prasana mapping method
    print("-------------------------------------------------------------------")
    print("               Example of use: Sidhu-Prasana NFA                   ")
    print("-------------------------------------------------------------------")
    print(" Ruleset: ../../rules/L7/selected.pcre                             ")
    print(" Strided: No                                                       ")
    print(" Genereated VHDL output: sidhu_prasana_nfa_impl.vhd                ")
    print("-------------------------------------------------------------------")
    # Create sindhu_prasana_nfa object
    cn = sindhu_prasana_nfa.sindhu_prasana_nfa()
    # Create parser - use default parser
    Test0 = parser.parser()
    # Load RE file
    Test0.load_file("../../rules/L7/selected.pcre")
    # Parse RE and create NFA
    cn.create_by_parser(Test0)
    # Call the compute method
    cn.compute()
    # Get number of used LUTs and FFs
    data = cn.report_logic()
    print(" Used LUTs estimation: " + str(data[0]))
    print(" Used FFs estimation: " + str(data[1]))
    print("-------------------------------------------------------------------")
    # Save implementation
    # Open file
    f = open("sidhu_prasana_nfa_impl.vhd", "w")
    # Get VHDL code and write the code
Esempio n. 35
0
    def test_compute(self):
        """compute()"""
        # Check the correctness of the logical machine output over
        # self.assertTrue on individual items + focus on the properties
        # of HistoryFA (transitions, flags, counters)

        # /abcd/; test with an expression that does not use properties
        # of HistoryFA
        par = parser("pcre_parser")
        par.set_text("/abcd/")
        history = HistoryFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        history.compute(NFA)

        copy = history.get_automaton()
        result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_fa/test_data/test_data_1.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)

        # /ab.*cd /; test with an expression that contains .*
        par = parser("pcre_parser")
        par.set_text("/ab.*cd/")
        history = HistoryFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        history.compute(NFA)

        copy = history.get_automaton()
        result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_fa/test_data/test_data_2.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)

        # /ab[^1234]*cd|efg/; test with an expression containing one
        # alternation [^1234]*, the second is not
        par = parser("pcre_parser")
        par.set_text("/ab[^1234]*cd|efg/")
        history = HistoryFA()
        history.create_by_parser(par)
        history.remove_epsilons()
        NFA = history.get_automaton(True)
        history.determinise(create_table = True)
        history.compute(NFA)

        copy = history.get_automaton()
        result = nfa_data().load_from_file(aux_func.getPatternMatchDir() + "/algorithms/history_fa/test_data/test_data_3.nfa_data")

        self.assertTrue(sorted(copy.states.keys()) ==
            sorted(result.states.keys()))
        self.assertTrue(copy.alphabet == result.alphabet)
        self.assertTrue(copy.start == result.start)
        self.assertTrue(copy.final == result.final)
        self.assertTrue(copy.transitions == result.transitions)
        self.assertTrue(copy.Flags == result.Flags)
Esempio n. 36
0
    for ruleset in rulesets:
        for stride in strides:

            ruleset_filename = rulesets_prefix + ruleset + rulesets_suffix
            # remove any non-alphanumeric character
            ruleset_abc = re.sub('[\W_]', '', ruleset)

            print('Generating (%s,%d)...' % (ruleset_filename, stride))

            start = time.time()

            # Create clark_nfa object
            cn = clark_nfa.clark_nfa(True, stride)
            # Create parser - use default parser
            Test0 = parser.parser("pcre_parser")
            # Load RE file
            Test0.load_file(ruleset_filename)
            # Parse RE and create NFA
            cn.create_by_parser(Test0)
            # Call the compute method
            cn.compute()
            # Get number of used LUTs and FFs
            logic = cn.report_logic()

            stop = time.time()

            report.write('%s %d %d %d %d %d %d %d %f\n' %
                         (ruleset_abc, stride, logic[0], logic[1], logic[2],
                          cn.get_state_num(),
                          len(cn.get_set_of_nondeterministic_states()),
Esempio n. 37
0
# Import used Netbench modules
from history_fa import HistoryFA
from netbench.pattern_match.parser import parser

# EXAMPLE of use for HistoryFA class
if __name__ == '__main__':
    print("-------------------------------------------------------------------")
    print("                  Example of use: History FA                       ")
    print("-------------------------------------------------------------------")
    print(" Ruleset: /ab.*cd/                                                 ")
    print(" Graphical representation: automaton.dot                           ")
    print("                           history.dot                             ")
    print("-------------------------------------------------------------------")
    
    # Create PCRE parser object
    par = parser("pcre_parser")
    # Set the ruleset
    par.set_text("/ab.*cd/")
    
    # Create HistoryFA object
    history = HistoryFA()
    # Parse the ruleset
    history.create_by_parser(par)
    # Remove epsilons
    history.remove_epsilons()
    # Get copy of NFA part
    NFA = history.get_automaton(True)
    # Determinise the automaton. Record corespondance between DFA states 
    # and NFA states
    history.determinise(create_table = True)
    # Create the History FA from deterministic automaton original NFA.