def test_first_nt(self): cfg = CFG.create( initial_symbol='S', productions={ 'S': {'A b', 'A B c'}, 'B': {'b B', 'A d', '&'}, 'A': {'a A', '&'}, }, ) self.assertSetEqual({'A', 'B'}, cfg.first_nonterminal('S')) self.assertSetEqual({'&'}, cfg.first_nonterminal('A')) self.assertSetEqual({'A', '&'}, cfg.first_nonterminal('B')) cfg = CFG.create( initial_symbol='S', productions={ 'S': {'A B C'}, 'A': {'a A', '&'}, 'B': {'b B', 'A C d'}, 'C': {'c C', '&'}, }, ) self.assertSetEqual({'A', 'B', 'C'}, cfg.first_nonterminal('S')) self.assertSetEqual({'&'}, cfg.first_nonterminal('A')) self.assertSetEqual({'A', 'C'}, cfg.first_nonterminal('B')) self.assertSetEqual({'&'}, cfg.first_nonterminal('C'))
def test_without_infertile(self): cfg = CFG.create( initial_symbol='S', productions={ 'S': {'a A', 'a'}, 'A': {'a A'}, }, ) fertile = cfg.without_infertile() self.assertEqual('S', fertile.initial_symbol) self.assertDictEqual({ 'S': {'a'} }, fertile.productions) cfg = CFG.create( initial_symbol='S', productions={ 'S': {'A a A b'}, 'A': {'c', '&'}, }, ) fertile = cfg.without_infertile() self.assertEqual({ 'S': {'A a A b'}, 'A': {'c', '&'}, }, fertile.productions)
def test_load(self): buf = io.StringIO(""" E -> T E' E' -> + T E' | & T -> F T' T' -> * F T' | & F -> ( E ) | id """) cfg = CFG.load(buf) self.assertEqual('E', cfg.initial_symbol) self.assertDictEqual({ 'E': {"T E'"}, "E'": {"+ T E'", '&'}, 'T': {"F T'"}, "T'": {"* F T'", '&'}, 'F': {'( E )', 'id'} }, cfg.productions) buf = io.StringIO('') with self.assertRaises(ValueError): CFG.load(buf) buf = io.StringIO(""" S -> """) with self.assertRaises(ValueError): CFG.load(buf)
def test_epsilon_free(self): cfg = CFG.create( initial_symbol='S', productions={ 'S': {'A a A b'}, 'A': {'c', '&'}, }, ) epsilon_free = cfg.epsilon_free() self.assertEqual('S', epsilon_free.initial_symbol) self.assertDictEqual({ 'S': {'a A b', 'A a b', 'a b', 'A a A b'}, 'A': {'c'}, }, epsilon_free.productions) cfg = CFG.create( initial_symbol='S', productions={ 'S': {'&'}, }, ) epsilon_free = cfg.epsilon_free() self.assertEqual("S'", epsilon_free.initial_symbol) self.assertDictEqual({ "S'": {'S', '&'}, }, epsilon_free.productions)
def testOut(self): function = FunctionDecl(void_type, 'f', [], Block([])) cfg = CFG('f') n5 = Numeral(5) n5.type = int_type n1 = Numeral(1) n1.type = int_type stmt = Operation(FunctionCall(Name('__out__'), [n5, n1])) cfg.connect(cfg.entry, stmt, cfg.exit) function.cfg = cfg function.symbol_table = SymbolTable() cfg.symbol_table = SymbolTable(function.symbol_table) program = Program([function]) self.assertSuccess(program) self.assertTrue( function.cfg.has_path( function.cfg.entry, Operation(BinaryOperation([Name('$t0'), '=', Numeral(5)])), Operation(BinaryOperation([Name('$t1'), '=', Numeral(1)])), Operation( FunctionCall(Name('__out__'), [Name('$t0'), Name('$t1')])), function.cfg.exit))
def cfg_filter(self, arg=None): ''' cfg筛选,获取候选函数存入self.candidate_func中 ''' if arg is None: # 默认获取下一对候选函数 self.cur_func['origin'], self.cur_func[ 'patch'] = self.func_generator.next() elif isinstance(arg, int) or isinstance(arg, str): # 按地址或函数名获取指定候选函数 self.cur_func['origin'], self.cur_func[ 'patch'] = self.binfile.get_func_graphs(arg) elif isinstance(arg, list) and isinstance(arg[0], FuncGraph): # 直接传递候选函数 self.cur_func['origin'], self.cur_func['patch'] = arg[0], arg[1] self.cfg['origin'] = CFG(self.cur_func['origin']) self.cfg['patch'] = CFG(self.cur_func['patch']) funcname = self.cfg['origin'].funcname self.func_cfg_centroid[funcname] = { 'origin': self.cfg['origin'].get_centroid(), 'patch': self.cfg['patch'].get_centroid() } if not self.cfg['origin'].same_with(self.cfg['patch']): self.candidate_func[funcname] = { 'origin': self.cfg['origin'].address, 'patch': self.cfg['patch'].address }
def test_ssa_complex_2(self): cfg = CFG(complex2_cfg_blocks) cfg.convert_to_ssa() for blk in cfg.blocks[::-1]: print(blk.name) print(blk) print()
def visit_FunctionDecl(self, func): cfg = CFG(func.name) func.cfg = cfg if hasattr(func, 'symbol_table'): cfg.symbol_table = SymbolTable(func.symbol_table.parent) cfg.symbol_table.embed(func.symbol_table) else: cfg.symbol_table = SymbolTable() prev_node = self.visit(func.body, cfg=cfg, entry=cfg.entry, exit=cfg.exit) if prev_node is not None: cfg.connect(prev_node, cfg.exit)
def test_insert_phis_complex_1(self): cfg = CFG(complex1_cfg_blocks) phis_inserted = cfg.insert_phis() self.assertEqual(phis_inserted, 1) for blk in [c1_block1, c1_block2, c1_block3]: for insn in blk.insns: self.assertEqual(insn.num_phis(), 0) insn = c1_block4.insns[0] self.assertEqual(insn.num_phis(), 1) self.assertEqual(insn.pcode[0].output, r3)
def test_insert_phis_complex_2(self): cfg = CFG(complex2_cfg_blocks) phis_inserted = cfg.insert_phis() self.assertEqual(phis_inserted, 2) for blk in [c2_block1, c2_block2, c2_block3, c2_block5]: for insn in blk.insns: self.assertEqual(insn.num_phis(), 0) insn = c2_block4.insns[0] self.assertEqual(insn.num_phis(), 2) outputs = set([pcop.output for pcop in insn.pcode if pcop.is_phi()]) self.assertEqual(outputs, set({r3, r1}))
def update_grammar(self): '''Updates grammar with given input and then updates UI. If it fails to generate the grammar, nothing happens.''' try: self.grammar = CFG.load(self.editor.toPlainText().splitlines()) except Exception as e: traceback.print_tb(e.__traceback__) self.window.statusBar().showMessage( 'Failed to generate grammar. Check your syntax.') #self.make_proper_item.setEnabled(False) #self.parse_table_item.setEnabled(False) return self.window.statusBar().showMessage('Done.') self.make_proper_item.setEnabled(True) self.parse_table_item.setEnabled(True) self.run_grammar_btn.setEnabled(False) try: self.update_tables() except RecursionError as e: self.window.statusBar().showMessage( 'Failed to generate grammar tables: Recursion depth overflow.') #self.make_proper_item.setEnabled(False) #self.parse_table_item.setEnabled(False) return
def execute_test(self, file, filepath, capsys): lexer = APLLexer() lexer.build() parser = APLYacc(output=YaccOutput.AST) parser.build(lexer) f = open(filepath) ast = parser.parse(f.read()) ast_str = '' if ast: ast_str = str(ast) output = open(os.path.join(TestSymtab.tests_out, file + ".ast")).read() errout = capsys.readouterr().err assert output.strip() == (errout + ast_str).strip() cfg_str = '' if ast: cfg = CFG(ast) cfg_str = str(cfg) output = open(os.path.join(TestSymtab.tests_out, file + ".cfg")).read() assert output.strip() == (errout + cfg_str).strip() symtab_str = '' if ast: symtab_str = symtab_from_ast(parser, ast) output = open(os.path.join(TestSymtab.tests_out, file + ".sym")).read() assert output.strip() == (errout + symtab_str).strip()
def test_parse_table(self): cfg = CFG.create( initial_symbol='E', productions={ 'E': {"T E'"}, "E'": {"+ T E'", '&'}, 'T': {"F T'"}, "T'": {"* F T'", '&'}, 'F': {'( E )', 'id'} }, ) self.assertDictEqual({ ('E', 'id'): "T E'", ('E', '('): "T E'", ("E'", ')'): '&', ("E'", '+'): "+ T E'", ("E'", '$'): '&', ('T', 'id'): "F T'", ('T', '('): "F T'", ("T'", ')'): '&', ("T'", '+'): '&', ("T'", '*'): "* F T'", ("T'", '$'): '&', ('F', 'id'): 'id', ('F', '('): '( E )', }, cfg.parse_table())
def test_parse(self): cfg = CFG.create( initial_symbol='E', productions={ 'E': {"T E'"}, "E'": {"+ T E'", '&'}, 'T': {"F T'"}, "T'": {"* F T'", '&'}, 'F': {'( E )', 'id'} }, ) parse = cfg.parse('id + id') self.assertTupleEqual((['id', '+', 'id'], ['E']), next(parse)) self.assertTupleEqual((['id', '+', 'id'], ["E'", 'T']), next(parse)) self.assertTupleEqual((['id', '+', 'id'], ["E'", "T'", 'F']), next(parse)) self.assertTupleEqual((['id', '+', 'id'], ["E'", "T'", 'id']), next(parse)) self.assertTupleEqual((['+', 'id'], ["E'", "T'"]), next(parse)) self.assertTupleEqual((['+', 'id'], ["E'"]), next(parse)) self.assertTupleEqual((['+', 'id'], ["E'", 'T', '+']), next(parse)) self.assertTupleEqual((['id'], ["E'", 'T']), next(parse)) self.assertTupleEqual((['id'], ["E'", "T'", 'F']), next(parse)) self.assertTupleEqual((['id'], ["E'", "T'", 'id']), next(parse)) self.assertTupleEqual(([], ["E'", "T'"]), next(parse)) self.assertTupleEqual(([], ["E'"]), next(parse)) self.assertTupleEqual(([], []), next(parse)) with self.assertRaises(StopIteration): next(parse)
def intersect(cfg, nfa): ret = CFG() Nt = cfg.nonterms St = list(nfa.states) for _ in range(len(Nt) * len(St)**2 - 1): ret.addNonterm() NtInv = { Nt[i]:i for i in range(len(Nt)) } StInv = { St[i]:i for i in range(len(St)) } def getNterm(p, A, q): idx = (NtInv[A]*len(St) + StInv[p])*len(St) + StInv[q] return ret.nonterms[idx] ret.start = getNterm(nfa.start, cfg.start, nfa.final) for lhs, rhsSet in cfg.productions.items(): for rhs in rhsSet: items = [(x, [], x) for x in St] for term in rhs: if term in Nt: newItems = [] for z in St: newItems.extend([(x, R + [getNterm(y, term, z)], z) for (x, R, y) in items]) items = newItems else: newItems = [] for (x, R, y) in items: newItems.extend([(x, R + [term], z) for z in nfa.transitions.get(x, {}).get(term, [])]) newItems.extend([(x, R + [term], z) for z in nfa.transitions.get(x, {}).get(WILDCARD, [])]) items = newItems for (p, R, q) in items: ret.addProduction(getNterm(p, lhs, q), R) return ret
def test_is_ll1(self): cfg = CFG.create( initial_symbol='S', productions={ 'S': {'a S a', 'b S b', 'a', 'b'} } ) self.assertFalse(cfg.is_ll1())
def test_nonterminals(self): cfg = CFG.create( initial_symbol='S', productions={ 'S': {'a A', '&'}, 'A': {'b S'}, }, ) self.assertSetEqual({'S', 'A'}, cfg.nonterminals)
def testOut(self): function = FunctionDecl(void_type, 'f', [], Block([])) cfg = CFG('f') n5 = Numeral(5) n5.type = int_type n1 = Numeral(1) n1.type = int_type stmt = Operation(FunctionCall(Name('__out__'), [n5, n1])) cfg.connect(cfg.entry, stmt, cfg.exit) function.cfg = cfg function.symbol_table = SymbolTable() cfg.symbol_table = SymbolTable(function.symbol_table) program = Program([function]) self.assertSuccess(program) self.assertTrue(function.cfg.has_path(function.cfg.entry, Operation(BinaryOperation([Name('$t0'), '=', Numeral(5)])), Operation(BinaryOperation([Name('$t1'), '=', Numeral(1)])), Operation(FunctionCall(Name('__out__'), [Name('$t0'), Name('$t1')])), function.cfg.exit))
def testEmpty(self): function = FunctionDecl(void_type, 'f', [], Block([])) cfg = CFG('f') cfg.connect(cfg.entry, cfg.exit) function.cfg = cfg program = Program([function]) linearise = self.assertSuccess(program) self.assertEquals( linearise.lines, [Label('f', public=True), Label('f$exit', public=True)])
def p_code(self, p): 'code : VOID MAIN LPAREN RPAREN block' logging.debug('body: %s' % (p[5])) for node in p[5]: self.ast_file.write(str(node)) self.ast_file.close() cfg = CFG(p[5]) self.cfg_file.write(str(cfg)) self.cfg_file.close()
def testOneStatement(self): function = FunctionDecl(void_type, 'f', [], Block([])) cfg = CFG('f') stmt = cfg.add(Operation(42)) cfg.connect(cfg.entry, stmt) cfg.connect(stmt, cfg.exit) function.cfg = cfg program = Program([function]) linearise = self.assertSuccess(program) self.assertEquals(linearise.lines, [ Label('f', public=True), Instruction(42), Label('f$exit', public=True) ])
def testInfiniteLoop(self): function = FunctionDecl(void_type, 'f', [], Block([])) cfg = CFG('f') node = cfg.add(Operation(42)) cfg.connect(cfg.entry, node, node) function.cfg = cfg program = Program([function]) linearise = self.assertSuccess(program) self.assertEquals(linearise.lines, [ Label('f', public=True), Label(2), Instruction(42), Jump(2), Label('f$exit', public=True) ])
def p_code(self, p): 'code : global_statement_list' logging.debug('code: %s' % (p[1])) for node in p[1]: self.ast_file.write(str(node)) self.ast_file.close() cfg = CFG(p[1]) self.cfg_file.write(str(cfg)) self.cfg_file.close() # print(self.tableptr.top()) print_procedures(self.tableptr.top(), self.sym_file) print_variables(self.tableptr.top(), self.sym_file)
def __init__(self, samplesFile, batchSize, binSize, stdDevThreshold, windowSize, recurrentThreshold): CFGBuilder.__init__(self) self.stat = Statistics(binSize, stdDevThreshold) self.ifb = InputFileBuffer(50000, samplesFile) self.batchSize = batchSize self.binSize = binSize self.stdDevThreshold = stdDevThreshold self.samplesFile = samplesFile self.windowSize = windowSize self.recurrentThreshold = recurrentThreshold self.targets = dict() self.bbr = BBRepository() self.cfg = CFG() self.numMerge = 0 self.highStdevEdges = 0 self.numHighStdevTries = 0 self.numHighStdevOK = 0
def parse_cfg(self): """Read from the stream, return a CFG object. This CFG object will be used to run the tasks. """ lines = self.read_section() it = iter(lines) # variables and terminals are comma separated, with no whitespace vas = re.sub('\s', '', next(it)).split(',') tes = re.sub('\s', '', next(it)).split(',') start = next(it) # the remaining lines are rules V -> production rules = list() for line in it: v, production = line.split('->') v = v.strip() # separate on whitespace (excluding leading or trailing whitespace) production = re.sub('\s', ' ', production.strip()) production = production.split(' ') rules.append((v, production)) return CFG(vas, tes, start, rules)
def testTest(self): function = FunctionDecl(void_type, 'f', [], Block([])) cfg = CFG('f') test_node = cfg.add(Test(42)) cfg.connect(cfg.entry, test_node) true_node = cfg.add(Operation(100)) cfg.connect(test_node, TrueEdge(), true_node, cfg.exit) false_node = cfg.add(Operation(200)) cfg.connect(test_node, FalseEdge(), false_node, cfg.exit) function.cfg = cfg program = Program([function]) linearise = self.assertSuccess(program) self.assertEquals(linearise.lines, [ Label('f', public=True), Branch(42, 3), Label(4), Instruction(200), Jump('f$exit'), Label(3), Instruction(100), Label('f$exit', public=True) ])
import csv import random import sys from cfg import CFG cfg1 = CFG() cfg1.add_prod('S', 'x|y|( S + S )|( S if B else S )') cfg1.add_prod('B', ' ( S < S )|( S == S )|( S > S )|True|False') if __name__ == '__main__': f = open(sys.argv[1]) reader = csv.reader(f, delimiter=',') eg = [(r[0], r[1], r[2]) for r in reader] progs_done = 0 OUTPUT = False while True: p = cfg1.get_next_prog() if (progs_done % 1000) == 0: OUTPUT = True if (OUTPUT): print("{} Programs evaluated".format(progs_done)) print("Evaluating program {}".format(p)) correct = 0 for e in eg: if (OUTPUT): print(" Evaluating example {}".format(e)) p_inst = p.replace('x', e[0]) p_inst = p_inst.replace('y', e[1]) if (OUTPUT): print(" Evaluation = %d" % eval(p_inst)) if (eval(p_inst) == int(e[2])): correct += 1
def test_ssa_complex_1(self): cfg = CFG(complex1_cfg_blocks) cfg.convert_to_ssa()
import argparse import torch import torch.utils.data as data from loader_voxforge import * from cfg import CFG import csv config = CFG() """ data_path_predict = "data/presentation" vx = VOXFORGE(data_path_predict, langs=config.vx.langs, ratios=[0., 0., 1.], label_type="lang", use_cache=False, use_precompute=False) vx.transform = config.vx.transform vx.target_transform = config.vx.target_transform vx = config.vx vx.set_split("train") config.vx = vx dl = data.DataLoader(vx, batch_size=1, shuffle=False) """ config.vx.set_split("test") RLENC = {v: k for (k, v) in config.vx.target_transform.vocab.items()} model = config.model model.eval() correct = 0 for i, (mb, tgt) in enumerate(config.dl):
def test_ssa_simple(self): cfg = CFG(simple_cfg_blocks) cfg.convert_to_ssa()
import SATA_PRETREAT import os import shutil import scipy import argparse import matplotlib.pyplot as plt import datetime import GETPERDATA from pyfaidx import Fasta from cfg import CFG parser = argparse.ArgumentParser() parser.add_argument('-f', help="the folder input and ouput.") args = parser.parse_args() cfg = CFG() # The below items should be check, modify if nessary. OUTPUT = cfg.output CLINICAL_ITEM = cfg.clinical_item MIN_ALIVE = cfg.min_alive VOTE_CLASS = 'Class_4' VOTE_LINE = 1 CALC_NUM = 10 SATA_LIST = [ # 'ICD_O3_pathology', # 'ICD_O3_site', # 'ajcc_stage', 'patient_age', # 'patient_gender', # 'patient_race', 'patient_weight'
def buildCFG(cfile, function=None): """This function will allow us to create the CFG for a specific function or for the entire translation Unit. Parameters ---------- tu_cursor : `obj`:Cursor Cursor of the Translation Unit. function : str Name of the function from which we want to obtain the CFG. Return ------ `obj`:CFG A CFG object. """ # First we need to build de AST tu_cursor = buildAST(cfile) if function: # Retrieving the children from the translation unit tu_child = tu_cursor.get_children() func = None for e in tu_child: if str(e.spelling) == str(function): func = e # The function is not in the translation unit if not func: return None else: f_childs = func.get_children() # Avoiding ParamDecl and looking for the CompoundStmt elm = [c for c in f_childs] # The compound stmt is always at the end of the children of a func_decl c_stmt = elm[-1] # Transform the cursors of the function and the compound stmt to CFG decorators of # function and compound stmt f_decorator = Decl(func) c_stmt_decorator = CompoundStmt(c_stmt) # instance of a cfg object cfg = CFG() return cfg.buildCFG(f_decorator, c_stmt_decorator) else: # Retrieving the children from the translation Unit tu_childs = tu_cursor.get_children() cfgs = [] # for each children of the translation unit, we check if it is a func_decl # by looking for the compound stmt cfg = CFGBuilder() for child in tu_childs: # If the element is a Funct Decl Cursor if child.kind is CursorKind.FUNCTION_DECL: childs = [c for c in child.get_children()] # Check if the last element is a compound statement if len(childs) > 0: if childs[-1].kind is CursorKind.COMPOUND_STMT: # FIXIT: accessing to a 'static' variable, # find another way to access to the CursorKind c_stmt = childs[-1] # Transform the cursors of the function and the compound stmt to CFG decorators of # function and compound stmt f_decorator = FunctionDecl(child) c_stmt_decorator = CompoundStmt(c_stmt) # instance of a cfg object # build the cfg # print cfg cfg_b = cfg.buildCFG(f_decorator, c_stmt_decorator) # Save the cfg cfgs.append([child.spelling, cfg_b]) return cfgs
class CFGBuilder1(CFGBuilder): ''' :type samplesFile: string :type batchSize: int :type binSize: int :type stdDevThreshold: float :type windowSize: int :type recurrentThreshold: int :type targets: dict[int, int] :type bbr: BBRepository :type cfg: CFG ''' def __init__(self, samplesFile, batchSize, binSize, stdDevThreshold, windowSize, recurrentThreshold): CFGBuilder.__init__(self) self.stat = Statistics(binSize, stdDevThreshold) self.ifb = InputFileBuffer(50000, samplesFile) self.batchSize = batchSize self.binSize = binSize self.stdDevThreshold = stdDevThreshold self.samplesFile = samplesFile self.windowSize = windowSize self.recurrentThreshold = recurrentThreshold self.targets = dict() self.bbr = BBRepository() self.cfg = CFG() self.numMerge = 0 self.highStdevEdges = 0 self.numHighStdevTries = 0 self.numHighStdevOK = 0 def buildCFGR(self, instrGen, justBuild, ib): for i in instrGen: b = self.stat.getBinFromAddr(i.pc) if b is None and (justBuild == 0): return if b is not None: recurrent = b.count > self.recurrentThreshold else: recurrent = False if recurrent or justBuild==1: # logger.debug("\t is target...") bb = self.bbr.getBB(i.pc) if not bb: bb = BB(i.pc) self.bbr.addBB(bb) while not bb.done: x = self.bbr.getBB(i.pc) if x: if x.entryAddress != bb.entryAddress: bb.done = 1 self.numMerge+=1 logger.debug("merging blocks %x and %x", bb.entryAddress, x.entryAddress) bb.addTarget(x) x.addSource(bb) self.cfg.addOrIncrementEdge(bb, x) break if not bb.hasInstruction(i.pc): bb.addInstruction(i) if i.isBranchOrCall(): bb.done = 1 iafter = ib.getInstructionAfter(i) if iafter is None: break #logger.debug("i: %s", i) #logger.debug("iafter: %s", iafter) if iafter.pc == i.target: #branch taken #logger.debug("0x%x: branch taken to 0x%x (%s)\n", i.pc, iafter.pc, i.text) # justBuild = 1 if b.count > self.recurrentThreshold else 0 # # self.buildCFGR(instrGen, justBuild, ib) self.buildCFGR(instrGen, 0, ib) targetBB = self.bbr.getBB(i.target) else: #branch not taken #logger.debug("0x%x: fallthrough to 0x%x (%s)\n", i.pc, iafter.pc, i.text) self.buildCFGR(instrGen, 0, ib) targetBB = self.bbr.getBB(iafter.pc) if targetBB: bb.addTarget(targetBB) targetBB.addSource(bb) self.cfg.addOrIncrementEdge(bb, targetBB) try: i = instrGen.next() except StopIteration: break if i.isBranchOrCall(): iafter = ib.getInstructionAfter(i) if iafter is not None: targetBB = self.bbr.getBB(iafter.pc) thisBB = self.bbr.getBB(i.pc) if targetBB and thisBB: thisBB.addTarget(targetBB) targetBB.addSource(thisBB) self.cfg.addOrIncrementEdge(thisBB, targetBB) def buildCFG(self): # ignore the first line in the samples file self.ifb.getLine() moreBatches = not self.ifb.eof commonBinIns = dict() totalIns = dict() lowstdev = 0 highstdev = 0 stddevs = [] while (moreBatches): ib = InstructionBatch(self.batchSize, self.ifb) moreBatches = ib.fromFile() ib.calcStatistics(self.windowSize, 1) self.stat.registerLowStDevStatistics(ib) stddevs.append(ib.meanWindowStdev) if ib.batchId % 100 == 0: logger.debug("batch %d", ib.batchId) if ib.meanWindowStdev <= self.stdDevThreshold: instrGen = ib.genInstruction() self.buildCFGR(instrGen, 0, ib) lowstdev+=1 else: highstdev+=1 printedIns = False instrGen = ib.genInstruction() for i in instrGen: if i.isBranchOrCall(): b = self.stat.getBinFromAddr(i.pc) if b is None: continue bb = self.bbr.getBB(i.pc) if bb and b.count > self.recurrentThreshold: iafter = ib.getInstructionAfter(i) if iafter is not None: otherBB = self.bbr.getBB(iafter.pc) if not otherBB: logger.debug("trying to create a new BB for %x", iafter.pc) self.buildCFGR(instrGen, 1, ib) self.numHighStdevTries+=1 otherBB = self.bbr.getBB(iafter.pc) if otherBB: logger.debug("got it!") self.numHighStdevOK+=1 if otherBB: bb.addTarget(otherBB) otherBB.addSource(bb) self.cfg.addOrIncrementEdge(bb, otherBB) self.highStdevEdges+=1 self.cfg.toDot("test_builder1.dot", True, False) self.cfg.printCFG() print len(self.bbr.blocks), " basic blocks were recognized" print len(self.stat.bins), " address bins were created" print lowstdev, " low standard deviation batches" print highstdev, " high standard deviation batches" totalBBIns = [] for i in self.bbr.blocks: totalBBIns.append(len(self.bbr.blocks[i].instructions)) print "each block has an average of ", np.mean(totalBBIns), "+-" , np.std(totalBBIns), " instructions" print "number of basic block merges: ", self.numMerge print "number of high standard deviation recurrent edges marked: ", self.highStdevEdges print "number of high standard deviation basic block build tries: ", self.numHighStdevTries print "number of high standard deviation basic block actually built: ", self.numHighStdevOK #implementar metricas: quantidade de instrucoes e blocos basicos por funcao stdev = file(self.samplesFile + ".stdev", 'w') for i in stddevs: stdev.write(str(i) + "\n") stdev.close()
def test_follow(self): cfg = CFG.create( initial_symbol='S', productions={ 'S': {'A B C'}, 'A': {'a A', '&'}, 'B': {'b B', 'A C d'}, 'C': {'c C', '&'}, }, ) self.assertSetEqual({'$'}, cfg.follow('S')) self.assertSetEqual({'a', 'b', 'c', 'd'}, cfg.follow('A')) self.assertSetEqual({'c', '$'}, cfg.follow('B')) self.assertSetEqual({'d', '$'}, cfg.follow('C')) cfg = CFG.create( initial_symbol='E', productions={ 'E': {"T E'"}, "E'": {"+ T E'", '&'}, 'T': {"F T'"}, "T'": {"* F T'", '&'}, 'F': {'( E )', 'id'} }, ) self.assertSetEqual({')', '$'}, cfg.follow('E')) self.assertSetEqual({')', '$'}, cfg.follow("E'")) self.assertSetEqual({'+', ')', '$'}, cfg.follow('T')) self.assertSetEqual({'+', ')', '$'}, cfg.follow("T'")) self.assertSetEqual({'*', '+', ')', '$'}, cfg.follow("F")) cfg = CFG.create( initial_symbol='S', productions={ 'S': {'a S a', 'b S b', 'a', 'b'}, }, ) self.assertEqual({'a', 'b', '$'}, cfg.follow('S')) cfg = CFG.create( initial_symbol='S', productions={ 'S': {'A a A b', 'B b B a'}, 'A': {'&'}, 'B': {'&'}, }, ) self.assertEqual({'a', 'b'}, cfg.follow('A')) self.assertEqual({'a', 'b'}, cfg.follow('B')) cfg = CFG.create( initial_symbol='S', productions={ 'S': {'a B S', 'b A S', '&'}, 'A': {'a', 'b A A'}, 'B': {'b', 'a B B'}, }, ) self.assertSetEqual({'$'}, cfg.follow('S')) self.assertSetEqual({'$', 'a', 'b'}, cfg.follow('A')) self.assertSetEqual({'$', 'a', 'b'}, cfg.follow('B'))
def optimise(self): """ Optimise the IR. Procedure: 1. split in frames 2. convert frames to graphs 3. optimise graphs 4. convert graphs to (flat) frames 5. concatenate frames to get optimised program. Store result in flat. """ self.logger.info('optimising global control flow graph') cfg = CFG(self.flat) if self.verbosity > 2: cfg.cfg_to_diagram("allinstr_graph_before.png") optimise_tree.optimise(cfg) if self.verbosity > 2: cfg.cfg_to_diagram("allinstr_graph_after.png") self.flat = cfg.cfg_to_flat() self.logger.info('optimising flat (jumps and branches)') self.flat = flat_opt.optimise(self.flat) self.logger.info('splitting flat in frames') frames = split_frames(self.flat) self.logger.info('creating graph for each frame') graphs = [CFG(frame) for frame in frames] self.logger.info('optimising blocks') for graphnr, graph in enumerate(graphs): self.logger.info('graph %d of %d' % (graphnr + 1, len(graphs))) Dataflow(graph) l = Liveness(graph,self.verbosity) #self.logger.info('Performing liveness optimalisation on graph') #change = True #while change: # l.analyse() # change = l.optimise() for blocknr, block in enumerate(graph.blocks): self.logger.debug('block %d of %d' % (blocknr + 1, len(graph.blocks))) cf_opt = b_opt.ConstantFold(block) cp_opt = b_opt.CopyPropagation(block) dc_opt = b_opt.DeadCode(block) done = False subopt_changes = False i = 0 while (not done): done = True i += 1 self.logger.debug('pass '+str(i)) subopt_changes = cf_opt.optimise() if subopt_changes:self.stats['cf'] += cf_opt.stats['cf'] done = done & (not subopt_changes) subopt_changes = cp_opt.optimise() if subopt_changes:self.stats['cp'] += cp_opt.stats['cp'] done = done & (not subopt_changes) subopt_changes = dc_opt.optimise() if subopt_changes:self.stats['dc'] += dc_opt.stats['dc'] done = done & (not subopt_changes) self.logger.info('basic-block peephole optimisations done:') self.logger.info('\t\tconstant folds: %d' % (self.stats['cf'])) self.logger.info('\t\tcopy propagations: %d' % (self.stats['cp'])) self.logger.info('\t\tdead code removes: %d' % (self.stats['dc'])) self.logger.info('joining graphs to frames') frames = [graph.cfg_to_flat() for graph in graphs] self.logger.info('joining frames to flat') self.flat = sum(frames, [])
def optimise(self): """ Optimise the IR. Procedure: 1. split in frames 2. convert frames to graphs 3. optimise graphs 4. convert graphs to (flat) frames 5. concatenate frames to get optimised program. Store result in flat. """ # top loop flat_orig = None top_loop_counter = 0 while True: if flat_orig == self.flat: self.logger.info('optimisation is stable') break if top_loop_counter == 10000: self.logger.warning('top loop limit reached (10000 iterations)') break flat_orig = self.flat[:] top_loop_counter += 1 self.logger.info('top pass %s' % str(top_loop_counter)) # a. if 'a' in self.enabled_optimisations: self.logger.info('optimising global control flow graph') cfg = CFG(self.flat) #if self.verbosity > 2: # cfg.cfg_to_diagram("allinstr_graph_before.png") optimise_tree.optimise(cfg) #if self.verbosity > 2: # cfg.cfg_to_diagram("allinstr_graph_after.png") self.flat = cfg.cfg_to_flat() # b. jump optimisations if 'b' in self.enabled_optimisations: self.logger.info('optimising flat (jumps and branches)') self.flat = flat_opt.optimise(self.flat) self.flat = parse_instr.parse(self.flat) self.logger.info('splitting flat in frames') frames = split_frames(self.flat) self.logger.info('creating graph for each frame') graphs = [CFG(frame) for frame in frames] self.logger.info('optimising blocks') for graphnr, graph in enumerate(graphs): self.logger.info('graph %d of %d' % (graphnr + 1, len(graphs))) #Dataflow(graph) if 'f' in self.enabled_optimisations: l = Liveness(graph,self.verbosity) self.logger.info('Performing liveness optimalisation on graph') change = True while change: l.analyse() change = l.optimise() for blocknr, block in enumerate(graph.blocks): self.logger.debug('block %d of %d' % (blocknr + 1, len(graph.blocks))) cf_opt = b_opt.ConstantFold(block) cp_opt = b_opt.CopyPropagation(block) dc_opt = b_opt.DeadCode(block) done = False subopt_changes = False i = 0 while (not done): done = True i += 1 self.logger.debug('\t pass '+str(i)) # c. constant folding if 'c' in self.enabled_optimisations: subopt_changes = cf_opt.optimise() if subopt_changes: self.stats['cf'] += cf_opt.stats['cf'] done = done & (not subopt_changes) # d. copy propagation if 'd' in self.enabled_optimisations: subopt_changes = cp_opt.optimise() if subopt_changes:self.stats['cp'] += cp_opt.stats['cp'] done = done & (not subopt_changes) # e. dead code removal if 'e' in self.enabled_optimisations: subopt_changes = dc_opt.optimise() if subopt_changes:self.stats['dc'] += dc_opt.stats['dc'] done = done & (not subopt_changes) self.logger.info('basic-block peephole optimisations done:') self.logger.info('\t constant folds: %d' % (self.stats['cf'])) self.logger.info('\t copy propagations: %d' % (self.stats['cp'])) self.logger.info('\t dead code removes: %d' % (self.stats['dc'])) self.logger.info('joining graphs to frames') frames = [graph.cfg_to_flat() for graph in graphs] self.logger.info('joining frames to flat') self.flat = sum(frames, [])
from cfg import T, NT, CFG Z = NT() P = NT() M = NT() a = T('a') b = T('b') g = CFG(Z, [ (Z, (a, P)), (Z, (b, M)), (P, (b,)), (P, (a,P,P)), (M, (a,)), (M, (b,M,M)) ]).kleene() g_ = g.chomsky_normal_form() print(g_.accepts('baabbbaaabba')) print(g_.accepts('baabbaaabba')) pl = g.get_pumping_lemma() print(pl) u,v,w,x,y = pl for i in range(20): print("%s%s%s%s%s" % (u, v*i, w, x*i, y))