def mine_call_grammar(self, function_list=None, qualified=False): grammar = self.initial_grammar() fn_list = function_list if function_list is None: fn_list = self.carver.called_functions(qualified=qualified) for function_name in fn_list: if function_list is None and (function_name.startswith("_") or function_name.startswith("<")): continue # Internal function # Ignore errors with mined functions try: function_grammar, function_symbol = self.mine_function_grammar( function_name, grammar) except: if function_list is not None: raise if function_symbol not in grammar[self.CALL_SYMBOL]: grammar[self.CALL_SYMBOL].append(function_symbol) grammar.update(function_grammar) assert is_valid_grammar(grammar) return grammar
def mine_grammar(self): grammar = extend_grammar(self.QUERY_GRAMMAR) grammar["<action>"] = [self.action] query = "" for field in self.fields: field_symbol = new_symbol(grammar, "<" + field + ">") field_type = self.fields[field] if query != "": query += "&" query += field_symbol if isinstance(field_type, str): field_type_symbol = "<" + field_type + ">" grammar[field_symbol] = [field + "=" + field_type_symbol] if field_type_symbol not in grammar: # Unknown type grammar[field_type_symbol] = ["<text>"] else: # List of values value_symbol = new_symbol(grammar, "<" + field + "-value>") grammar[field_symbol] = [field + "=" + value_symbol] grammar[value_symbol] = field_type grammar["<query>"] = [query] # Remove unused parts for nonterminal in unreachable_nonterminals(grammar): del grammar[nonterminal] assert is_valid_grammar(grammar) return grammar
def is_valid_probabilistic_grammar(grammar, start_symbol=START_SYMBOL): if not is_valid_grammar(grammar, start_symbol): return False for nonterminal in grammar: expansions = grammar[nonterminal] prob_dist = exp_probabilities(expansions, nonterminal) return True
def list_grammar(object_grammar, list_object_symbol=None): obj_list_grammar = extend_grammar(LIST_GRAMMAR) if list_object_symbol is None: # Default: Use the first expansion of <start> as list symbol list_object_symbol = object_grammar[START_SYMBOL][0] obj_list_grammar.update(object_grammar) obj_list_grammar[START_SYMBOL] = ["<list>"] obj_list_grammar["<list-object>"] = [list_object_symbol] assert is_valid_grammar(obj_list_grammar) return obj_list_grammar
def mine_state_grammar(self, grammar={}, state_symbol=None): grammar = extend_grammar(self.GUI_GRAMMAR, grammar) if state_symbol is None: state_symbol = self.new_state_symbol(grammar) grammar[state_symbol] = [] alternatives = [] form = "" submit = None for action in self.mine_state_actions(): if action.startswith("submit"): submit = action elif action.startswith("click"): link_target = self.new_state_symbol(grammar) grammar[link_target] = [self.UNEXPLORED_STATE] alternatives.append(action + '\n' + link_target) elif action.startswith("ignore"): pass else: # fill(), check() actions if len(form) > 0: form += '\n' form += action if submit is not None: if len(form) > 0: form += '\n' form += submit if len(form) > 0: form_target = self.new_state_symbol(grammar) grammar[form_target] = [self.UNEXPLORED_STATE] alternatives.append(form + '\n' + form_target) alternatives += [self.FINAL_STATE] grammar[state_symbol] = alternatives # Remove unused parts for nonterminal in unreachable_nonterminals(grammar): del grammar[nonterminal] assert is_valid_grammar(grammar) return grammar
def make_grammar(num_symbols=3, num_alts=3): terminals = list(string.ascii_lowercase) grammar = {} name = None for _ in range(num_symbols): nonterminals = [k[1:-1] for k in grammar.keys()] name, expansions = \ make_rule(nonterminals, terminals, num_alts) grammar[name] = expansions grammar[START_SYMBOL] = [name] # Remove unused parts for nonterminal in unreachable_nonterminals(grammar): del grammar[nonterminal] assert is_valid_grammar(grammar) return grammar
else: from .Grammars import crange, is_valid_grammar ORDER_GRAMMAR = { "<start>": ["<order>"], "<order>": ["order?item=<item>&name=<name>&email=<email>&city=<city>&zip=<zip>"], "<item>": ["tshirt", "drill", "lockset"], "<name>": ["Jane Doe", "John Smith"], "<email>": ["*****@*****.**"], "<city>": ["Seattle", "New York"], "<zip>": ["<digit>" * 5], "<digit>": crange('0', '9') } assert is_valid_grammar(ORDER_GRAMMAR) BAD_ORDER_GRAMMAR = { "<name>": ["Robert'; drop table students; --"], # https://xkcd.com/327/ "<city>": ["Mötley Crüe"], } ... if __name__ == "__main__": time.sleep(5) http_process.terminate() import os if __name__ == "__main__": os.remove("httpd_port.txt")
if __package__ is None or __package__ == "": from Grammars import crange, srange, convert_ebnf_grammar, is_valid_grammar, START_SYMBOL, new_symbol else: from .Grammars import crange, srange, convert_ebnf_grammar, is_valid_grammar, START_SYMBOL, new_symbol PROCESS_NUMBERS_EBNF_GRAMMAR = { "<start>": ["<operator> <integers>"], "<operator>": ["--sum", "--min", "--max"], "<integers>": ["<integer>", "<integers> <integer>"], "<integer>": ["<digit>+"], "<digit>": crange('0', '9') } assert is_valid_grammar(PROCESS_NUMBERS_EBNF_GRAMMAR) PROCESS_NUMBERS_GRAMMAR = convert_ebnf_grammar(PROCESS_NUMBERS_EBNF_GRAMMAR) if __package__ is None or __package__ == "": from GrammarCoverageFuzzer import GrammarCoverageFuzzer else: from .GrammarCoverageFuzzer import GrammarCoverageFuzzer if __name__ == "__main__": f = GrammarCoverageFuzzer(PROCESS_NUMBERS_GRAMMAR, min_nonterminals=10) for i in range(3): print(f.fuzz())
"<xml-openclose-tag>", "<xml-tree><xml-tree>" ], "<xml-open-tag>": ["<<id>>", "<<id> <xml-attribute>>"], "<xml-openclose-tag>": ["<<id>/>", "<<id> <xml-attribute>/>"], "<xml-close-tag>": ["</<id>>"], "<xml-attribute>": ["<id>=<id>", "<xml-attribute> <xml-attribute>"], "<id>": ["<letter>", "<id><letter>"], "<text>": ["<text><letter_space>", "<letter_space>"], "<letter>": srange(string.ascii_letters + string.digits + "\"" + "'" + "."), "<letter_space>": srange(string.ascii_letters + string.digits + "\"" + "'" + " " + "\t"), } if __name__ == "__main__": assert is_valid_grammar(XML_GRAMMAR) if __package__ is None or __package__ == "": from Parser import EarleyParser else: from .Parser import EarleyParser if __package__ is None or __package__ == "": from GrammarFuzzer import display_tree else: from .GrammarFuzzer import display_tree if __name__ == "__main__": parser = EarleyParser(XML_GRAMMAR, tokens=XML_TOKENS) for tree in parser.parse("<html>Text</html>"):
if __package__ is None or __package__ == "": from Grammars import crange, srange, convert_ebnf_grammar, is_valid_grammar, START_SYMBOL, new_symbol else: from .Grammars import crange, srange, convert_ebnf_grammar, is_valid_grammar, START_SYMBOL, new_symbol PROCESS_NUMBERS_EBNF_GRAMMAR = { "<start>": ["<operator> <integers>"], "<operator>": ["--sum", "--min", "--max"], "<integers>": ["<integer>", "<integers> <integer>"], "<integer>": ["<digit>+"], "<digit>": crange('0', '9') } assert is_valid_grammar(PROCESS_NUMBERS_EBNF_GRAMMAR) PROCESS_NUMBERS_GRAMMAR = convert_ebnf_grammar(PROCESS_NUMBERS_EBNF_GRAMMAR) if __package__ is None or __package__ == "": from GrammarCoverageFuzzer import GrammarCoverageFuzzer else: from .GrammarCoverageFuzzer import GrammarCoverageFuzzer if __name__ == "__main__": f = GrammarCoverageFuzzer(PROCESS_NUMBERS_GRAMMAR, min_nonterminals=10) for i in range(3): print(f.fuzz())
URLPARSE_GRAMMAR = { "<call>": ['urlparse("<url>")'] } if __name__ == "__main__": URLPARSE_GRAMMAR.update(URL_GRAMMAR) if __name__ == "__main__": URLPARSE_GRAMMAR["<start>"] = ["<call>"] if __name__ == "__main__": assert is_valid_grammar(URLPARSE_GRAMMAR) if __name__ == "__main__": URLPARSE_GRAMMAR if __name__ == "__main__": urlparse_fuzzer = GrammarFuzzer(URLPARSE_GRAMMAR) urlparse_fuzzer.fuzz() if __name__ == "__main__": # Call function_name(arg[0], arg[1], ...) as a string def do_call(call_string): print(call_string)
from Grammars import opts, exp_opt, exp_string, crange else: from .Grammars import opts, exp_opt, exp_string, crange CHARGE_GRAMMAR = { "<start>": ["Charge <amount> to my credit card <credit-card-number>"], "<amount>": ["$<float>"], "<float>": ["<integer>.<digit><digit>"], "<integer>": ["<digit>", "<integer><digit>"], "<digit>": crange('0', '9'), "<credit-card-number>": ["<digits>"], "<digits>": ["<digit-block><digit-block><digit-block><digit-block>"], "<digit-block>": ["<digit><digit><digit><digit>"], } assert is_valid_grammar(CHARGE_GRAMMAR) if __package__ is None or __package__ == "": from GrammarFuzzer import GrammarFuzzer, all_terminals, display_tree else: from .GrammarFuzzer import GrammarFuzzer, all_terminals, display_tree if __name__ == "__main__": g = GrammarFuzzer(CHARGE_GRAMMAR) [g.fuzz() for i in range(5)] # ## Attaching Functions to Expansions if __name__ == "__main__": print('\n## Attaching Functions to Expansions')
def check_grammar(self): assert self.start_symbol in self.grammar assert is_valid_grammar(self.grammar, start_symbol=self.start_symbol, supported_opts=self.supported_opts())
if __name__ == "__main__": power_carver.arguments("power") if __package__ is None or __package__ == "": from Grammars import START_SYMBOL, is_valid_grammar, new_symbol, extend_grammar else: from .Grammars import START_SYMBOL, is_valid_grammar, new_symbol, extend_grammar POWER_GRAMMAR = { "<start>": ["power(<x>, <y>)"], "<x>": ["1", "3"], "<y>": ["2", "4"] } assert is_valid_grammar(POWER_GRAMMAR) if __package__ is None or __package__ == "": from GrammarCoverageFuzzer import GrammarCoverageFuzzer else: from .GrammarCoverageFuzzer import GrammarCoverageFuzzer if __name__ == "__main__": power_fuzzer = GrammarCoverageFuzzer(POWER_GRAMMAR) [power_fuzzer.fuzz() for i in range(5)] # ### A Grammar Miner for Calls if __name__ == "__main__": print('\n### A Grammar Miner for Calls')
if __package__ is None or __package__ == "": from Grammars import is_valid_grammar, crange, convert_ebnf_grammar # minor dependency else: from .Grammars import is_valid_grammar, crange, convert_ebnf_grammar # minor dependency SUM2_EBNF_GRAMMAR = { "<start>": ["<sum2>"], "<sum2>": ["sum2(<int>, <int>)"], "<int>": ["<_int>"], "<_int>": ["(-)?<leaddigit><digit>*", "0"], "<leaddigit>": crange('1', '9'), "<digit>": crange('0', '9') } assert is_valid_grammar(SUM2_EBNF_GRAMMAR) if __name__ == "__main__": sum2_grammar = convert_ebnf_grammar(SUM2_EBNF_GRAMMAR) if __name__ == "__main__": sum2_fuzzer = GrammarFuzzer(sum2_grammar) [sum2_fuzzer.fuzz() for i in range(10)] if __name__ == "__main__": with InvariantAnnotator() as annotator: for i in range(10): eval(sum2_fuzzer.fuzz()) print_content(annotator.function_with_invariants('sum2'), '.py')
if __name__ == "__main__": print('\n## Synthesizing Code') if __name__ == "__main__": call = "urlparse('http://www.example.com/')" if __name__ == "__main__": eval(call) URLPARSE_GRAMMAR = {"<call>": ['urlparse("<url>")']} # Import definitions from URL_GRAMMAR URLPARSE_GRAMMAR.update(URL_GRAMMAR) URLPARSE_GRAMMAR["<start>"] = ["<call>"] assert is_valid_grammar(URLPARSE_GRAMMAR) if __name__ == "__main__": URLPARSE_GRAMMAR if __name__ == "__main__": urlparse_fuzzer = GrammarFuzzer(URLPARSE_GRAMMAR) urlparse_fuzzer.fuzz() if __name__ == "__main__": # Call function_name(arg[0], arg[1], ...) as a string def do_call(call_string): print(call_string) result = eval(call_string) print("\t= " + repr(result)) return result
if __name__ == "__main__": dup_expr_grammar = extend_grammar( EXPR_GRAMMAR, { "<factor>": [ "+<factor>", "-<factor>", "(<expr>)", "<integer-1>.<integer-2>", "<integer>" ], "<integer-1>": ["<digit-1><integer-1>", "<digit-1>"], "<integer-2>": ["<digit-2><integer-2>", "<digit-2>"], "<digit-1>": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], "<digit-2>": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] }) if __name__ == "__main__": assert is_valid_grammar(dup_expr_grammar) if __name__ == "__main__": f = GrammarCoverageFuzzer(dup_expr_grammar, start_symbol="<factor>") for i in range(10): print(f.fuzz()) # ### Extending Grammars for Context Coverage Programmatically if __name__ == "__main__": print('\n### Extending Grammars for Context Coverage Programmatically') if __package__ is None or __package__ == "": from Grammars import new_symbol, unreachable_nonterminals else: from .Grammars import new_symbol, unreachable_nonterminals
if __name__ == "__main__": print('\n### Exercise 1: Testing ls') LS_EBNF_GRAMMAR = { '<start>': ['-<options>'], '<options>': ['<option>*'], '<option>': ['1', 'A', '@', # many more ] } assert is_valid_grammar(LS_EBNF_GRAMMAR) if __package__ is None or __package__ == "": from Grammars import convert_ebnf_grammar, srange else: from .Grammars import convert_ebnf_grammar, srange LS_EBNF_GRAMMAR = { '<start>': ['-<options>'], '<options>': ['<option>*'], '<option>': srange("ABCFGHLOPRSTUW@abcdefghiklmnopqrstuwx1") } assert is_valid_grammar(LS_EBNF_GRAMMAR) LS_GRAMMAR = convert_ebnf_grammar(LS_EBNF_GRAMMAR)
("3", opts(prob=0.125)), ("4", opts(prob=0.097)), ("5", opts(prob=0.079)), ("6", opts(prob=0.067)), ("7", opts(prob=0.058)), ("8", opts(prob=0.051)), ("9", opts(prob=0.046)), ], # Remaining digits are equally distributed "<integer>": ["<digit><integer>", "<digit>"], "<digit>": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], } if __name__ == "__main__": assert is_valid_grammar(PROBABILISTIC_EXPR_GRAMMAR, supported_opts={'prob'}) if __name__ == "__main__": PROBABILISTIC_EXPR_GRAMMAR["<leaddigit>"] if __name__ == "__main__": leaddigit_expansion = PROBABILISTIC_EXPR_GRAMMAR["<leaddigit>"][0] leaddigit_expansion if __name__ == "__main__": exp_string(leaddigit_expansion) def exp_prob(expansion): """Return the options of an expansion""" return exp_opt(expansion, 'prob')
("5", opts(prob=0.079)), ("6", opts(prob=0.067)), ("7", opts(prob=0.058)), ("8", opts(prob=0.051)), ("9", opts(prob=0.046)), ], # Remaining digits are equally distributed "<integer>": ["<digit><integer>", "<digit>"], "<digit>": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], } assert is_valid_grammar(PROBABILISTIC_EXPR_GRAMMAR) if __name__ == "__main__": f = GrammarFuzzer(PROBABILISTIC_EXPR_GRAMMAR) f.fuzz() # ## _Section 4_ if __name__ == "__main__": print('\n## _Section 4_') # ## Lessons Learned
("3", opts(prob=0.125)), ("4", opts(prob=0.097)), ("5", opts(prob=0.079)), ("6", opts(prob=0.067)), ("7", opts(prob=0.058)), ("8", opts(prob=0.051)), ("9", opts(prob=0.046)), ], # Remaining digits are equally distributed "<integer>": ["<digit><integer>", "<digit>"], "<digit>": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], } if __name__ == "__main__": assert is_valid_grammar(PROBABILISTIC_EXPR_GRAMMAR) if __name__ == "__main__": PROBABILISTIC_EXPR_GRAMMAR["<leaddigit>"] def exp_string(expansion): """Return the string to be expanded""" if isinstance(expansion, str): return expansion return expansion[0] if __name__ == "__main__": leaddigit_expansion = PROBABILISTIC_EXPR_GRAMMAR["<leaddigit>"][0] leaddigit_expansion