Esempio n. 1
0
    def test_first_nt(self):
        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'A b', 'A B c'},
                'B': {'b B', 'A d', '&'},
                'A': {'a A', '&'},
            },
        )

        self.assertSetEqual({'A', 'B'}, cfg.first_nonterminal('S'))
        self.assertSetEqual({'&'}, cfg.first_nonterminal('A'))
        self.assertSetEqual({'A', '&'}, cfg.first_nonterminal('B'))

        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'A B C'},
                'A': {'a A', '&'},
                'B': {'b B', 'A C d'},
                'C': {'c C', '&'},
            },
        )

        self.assertSetEqual({'A', 'B', 'C'}, cfg.first_nonterminal('S'))
        self.assertSetEqual({'&'}, cfg.first_nonterminal('A'))
        self.assertSetEqual({'A', 'C'}, cfg.first_nonterminal('B'))
        self.assertSetEqual({'&'}, cfg.first_nonterminal('C'))
Esempio n. 2
0
    def test_without_infertile(self):
        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'a A', 'a'},
                'A': {'a A'},
            },
        )

        fertile = cfg.without_infertile()
        self.assertEqual('S', fertile.initial_symbol)
        self.assertDictEqual({
            'S': {'a'}
        }, fertile.productions)

        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'A a A b'},
                'A': {'c', '&'},
            },
        )

        fertile = cfg.without_infertile()
        self.assertEqual({
            'S': {'A a A b'},
            'A': {'c', '&'},
        }, fertile.productions)
Esempio n. 3
0
    def test_load(self):
        buf = io.StringIO("""
            E -> T E'
            E' -> + T E' | &
            T -> F T'
            T' -> * F T' | &
            F -> ( E ) | id
        """)

        cfg = CFG.load(buf)
        self.assertEqual('E', cfg.initial_symbol)
        self.assertDictEqual({
            'E': {"T E'"},
            "E'": {"+ T E'", '&'},
            'T': {"F T'"},
            "T'": {"* F T'", '&'},
            'F': {'( E )', 'id'}
        }, cfg.productions)

        buf = io.StringIO('')
        with self.assertRaises(ValueError):
            CFG.load(buf)

        buf = io.StringIO("""
            S ->
        """)
        with self.assertRaises(ValueError):
            CFG.load(buf)
Esempio n. 4
0
    def test_epsilon_free(self):
        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'A a A b'},
                'A': {'c', '&'},
            },
        )

        epsilon_free = cfg.epsilon_free()
        self.assertEqual('S', epsilon_free.initial_symbol)
        self.assertDictEqual({
            'S': {'a A b', 'A a b', 'a b', 'A a A b'},
            'A': {'c'},
        }, epsilon_free.productions)

        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'&'},
            },
        )

        epsilon_free = cfg.epsilon_free()
        self.assertEqual("S'", epsilon_free.initial_symbol)
        self.assertDictEqual({
            "S'": {'S', '&'},
        }, epsilon_free.productions)
Esempio n. 5
0
 def testOut(self):
     function = FunctionDecl(void_type, 'f', [], Block([]))
     cfg = CFG('f')
     n5 = Numeral(5)
     n5.type = int_type
     n1 = Numeral(1)
     n1.type = int_type
     stmt = Operation(FunctionCall(Name('__out__'), [n5, n1]))
     cfg.connect(cfg.entry, stmt, cfg.exit)
     function.cfg = cfg
     function.symbol_table = SymbolTable()
     cfg.symbol_table = SymbolTable(function.symbol_table)
     program = Program([function])
     self.assertSuccess(program)
     self.assertTrue(
         function.cfg.has_path(
             function.cfg.entry,
             Operation(BinaryOperation([Name('$t0'), '=',
                                        Numeral(5)])),
             Operation(BinaryOperation([Name('$t1'), '=',
                                        Numeral(1)])),
             Operation(
                 FunctionCall(Name('__out__'),
                              [Name('$t0'), Name('$t1')])),
             function.cfg.exit))
Esempio n. 6
0
 def cfg_filter(self, arg=None):
     '''
     cfg筛选,获取候选函数存入self.candidate_func中
     '''
     if arg is None:
         # 默认获取下一对候选函数
         self.cur_func['origin'], self.cur_func[
             'patch'] = self.func_generator.next()
     elif isinstance(arg, int) or isinstance(arg, str):
         # 按地址或函数名获取指定候选函数
         self.cur_func['origin'], self.cur_func[
             'patch'] = self.binfile.get_func_graphs(arg)
     elif isinstance(arg, list) and isinstance(arg[0], FuncGraph):
         # 直接传递候选函数
         self.cur_func['origin'], self.cur_func['patch'] = arg[0], arg[1]
     self.cfg['origin'] = CFG(self.cur_func['origin'])
     self.cfg['patch'] = CFG(self.cur_func['patch'])
     funcname = self.cfg['origin'].funcname
     self.func_cfg_centroid[funcname] = {
         'origin': self.cfg['origin'].get_centroid(),
         'patch': self.cfg['patch'].get_centroid()
     }
     if not self.cfg['origin'].same_with(self.cfg['patch']):
         self.candidate_func[funcname] = {
             'origin': self.cfg['origin'].address,
             'patch': self.cfg['patch'].address
         }
Esempio n. 7
0
    def test_ssa_complex_2(self):
        cfg = CFG(complex2_cfg_blocks)
        cfg.convert_to_ssa()

        for blk in cfg.blocks[::-1]:
            print(blk.name)
            print(blk)
            print()
Esempio n. 8
0
 def visit_FunctionDecl(self, func):
     cfg = CFG(func.name)
     func.cfg = cfg
     
     if hasattr(func, 'symbol_table'):
         cfg.symbol_table = SymbolTable(func.symbol_table.parent)
         cfg.symbol_table.embed(func.symbol_table)
     else:
         cfg.symbol_table = SymbolTable()
     
     prev_node = self.visit(func.body, cfg=cfg, entry=cfg.entry, exit=cfg.exit)
     if prev_node is not None:
         cfg.connect(prev_node, cfg.exit)
Esempio n. 9
0
    def test_insert_phis_complex_1(self):
        cfg = CFG(complex1_cfg_blocks)
        phis_inserted = cfg.insert_phis()

        self.assertEqual(phis_inserted, 1)

        for blk in [c1_block1, c1_block2, c1_block3]:
            for insn in blk.insns:
                self.assertEqual(insn.num_phis(), 0)

        insn = c1_block4.insns[0]
        self.assertEqual(insn.num_phis(), 1)
        self.assertEqual(insn.pcode[0].output, r3)
Esempio n. 10
0
    def test_insert_phis_complex_2(self):
        cfg = CFG(complex2_cfg_blocks)
        phis_inserted = cfg.insert_phis()

        self.assertEqual(phis_inserted, 2)

        for blk in [c2_block1, c2_block2, c2_block3, c2_block5]:
            for insn in blk.insns:
                self.assertEqual(insn.num_phis(), 0)

        insn = c2_block4.insns[0]
        self.assertEqual(insn.num_phis(), 2)

        outputs = set([pcop.output for pcop in insn.pcode if pcop.is_phi()])
        self.assertEqual(outputs, set({r3, r1}))
Esempio n. 11
0
    def update_grammar(self):
        '''Updates grammar with given input and then updates UI.
           If it fails to generate the grammar, nothing happens.'''
        try:
            self.grammar = CFG.load(self.editor.toPlainText().splitlines())
        except Exception as e:
            traceback.print_tb(e.__traceback__)
            self.window.statusBar().showMessage(
                'Failed to generate grammar. Check your syntax.')
            #self.make_proper_item.setEnabled(False)
            #self.parse_table_item.setEnabled(False)
            return

        self.window.statusBar().showMessage('Done.')
        self.make_proper_item.setEnabled(True)
        self.parse_table_item.setEnabled(True)
        self.run_grammar_btn.setEnabled(False)
        try:
            self.update_tables()
        except RecursionError as e:
            self.window.statusBar().showMessage(
                'Failed to generate grammar tables: Recursion depth overflow.')
            #self.make_proper_item.setEnabled(False)
            #self.parse_table_item.setEnabled(False)
            return
Esempio n. 12
0
    def execute_test(self, file, filepath, capsys):
        lexer = APLLexer()
        lexer.build()

        parser = APLYacc(output=YaccOutput.AST)
        parser.build(lexer)

        f = open(filepath)
        ast = parser.parse(f.read())

        ast_str = ''
        if ast:
            ast_str = str(ast)

        output = open(os.path.join(TestSymtab.tests_out, file + ".ast")).read()
        errout = capsys.readouterr().err
        assert output.strip() == (errout + ast_str).strip()

        cfg_str = ''
        if ast:
            cfg = CFG(ast)
            cfg_str = str(cfg)
        output = open(os.path.join(TestSymtab.tests_out, file + ".cfg")).read()
        assert output.strip() == (errout + cfg_str).strip()

        symtab_str = ''
        if ast:
            symtab_str = symtab_from_ast(parser, ast)
        output = open(os.path.join(TestSymtab.tests_out, file + ".sym")).read()
        assert output.strip() == (errout + symtab_str).strip()
Esempio n. 13
0
    def test_parse_table(self):
        cfg = CFG.create(
            initial_symbol='E',
            productions={
                'E': {"T E'"},
                "E'": {"+ T E'", '&'},
                'T': {"F T'"},
                "T'": {"* F T'", '&'},
                'F': {'( E )', 'id'}
            },
        )

        self.assertDictEqual({
            ('E', 'id'): "T E'",
            ('E', '('): "T E'",
            ("E'", ')'): '&',
            ("E'", '+'): "+ T E'",
            ("E'", '$'): '&',
            ('T', 'id'): "F T'",
            ('T', '('): "F T'",
            ("T'", ')'): '&',
            ("T'", '+'): '&',
            ("T'", '*'): "* F T'",
            ("T'", '$'): '&',
            ('F', 'id'): 'id',
            ('F', '('): '( E )',
        }, cfg.parse_table())
Esempio n. 14
0
    def test_parse(self):
        cfg = CFG.create(
            initial_symbol='E',
            productions={
                'E': {"T E'"},
                "E'": {"+ T E'", '&'},
                'T': {"F T'"},
                "T'": {"* F T'", '&'},
                'F': {'( E )', 'id'}
            },
        )

        parse = cfg.parse('id + id')

        self.assertTupleEqual((['id', '+', 'id'], ['E']), next(parse))
        self.assertTupleEqual((['id', '+', 'id'], ["E'", 'T']), next(parse))
        self.assertTupleEqual((['id', '+', 'id'], ["E'", "T'", 'F']), next(parse))
        self.assertTupleEqual((['id', '+', 'id'], ["E'", "T'", 'id']), next(parse))
        self.assertTupleEqual((['+', 'id'], ["E'", "T'"]), next(parse))
        self.assertTupleEqual((['+', 'id'], ["E'"]), next(parse))
        self.assertTupleEqual((['+', 'id'], ["E'", 'T', '+']), next(parse))
        self.assertTupleEqual((['id'], ["E'", 'T']), next(parse))
        self.assertTupleEqual((['id'], ["E'", "T'", 'F']), next(parse))
        self.assertTupleEqual((['id'], ["E'", "T'", 'id']), next(parse))
        self.assertTupleEqual(([], ["E'", "T'"]), next(parse))
        self.assertTupleEqual(([], ["E'"]), next(parse))
        self.assertTupleEqual(([], []), next(parse))

        with self.assertRaises(StopIteration):
            next(parse)
Esempio n. 15
0
def intersect(cfg, nfa):
    ret = CFG()
    Nt = cfg.nonterms
    St = list(nfa.states)
    for _ in range(len(Nt) * len(St)**2  -  1):
        ret.addNonterm()
    NtInv = { Nt[i]:i for i in range(len(Nt)) }
    StInv = { St[i]:i for i in range(len(St)) }
    def getNterm(p, A, q):
        idx = (NtInv[A]*len(St) + StInv[p])*len(St) + StInv[q]
        return ret.nonterms[idx]
    ret.start = getNterm(nfa.start, cfg.start, nfa.final)
    for lhs, rhsSet in cfg.productions.items():
        for rhs in rhsSet:
            items = [(x, [], x) for x in St]
            for term in rhs:
                if term in Nt:
                    newItems = []
                    for z in St:
                        newItems.extend([(x, R + [getNterm(y, term, z)], z) for (x, R, y) in items])
                    items = newItems
                else:
                    newItems = []
                    for (x, R, y) in items:
                        newItems.extend([(x, R + [term], z) for z in nfa.transitions.get(x, {}).get(term,     [])])
                        newItems.extend([(x, R + [term], z) for z in nfa.transitions.get(x, {}).get(WILDCARD, [])])
                    items = newItems
            for (p, R, q) in items:
                ret.addProduction(getNterm(p, lhs, q), R)
    return ret
Esempio n. 16
0
    def test_is_ll1(self):
        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'a S a', 'b S b', 'a', 'b'}
            }
        )

        self.assertFalse(cfg.is_ll1())
Esempio n. 17
0
    def test_nonterminals(self):
        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'a A', '&'},
                'A': {'b S'},
            },
        )

        self.assertSetEqual({'S', 'A'}, cfg.nonterminals)
Esempio n. 18
0
 def testOut(self):
     function = FunctionDecl(void_type, 'f', [], Block([]))
     cfg = CFG('f')
     n5 = Numeral(5)
     n5.type = int_type
     n1 = Numeral(1)
     n1.type = int_type
     stmt = Operation(FunctionCall(Name('__out__'), [n5, n1]))
     cfg.connect(cfg.entry, stmt, cfg.exit)
     function.cfg = cfg
     function.symbol_table = SymbolTable()
     cfg.symbol_table = SymbolTable(function.symbol_table)
     program = Program([function])
     self.assertSuccess(program)
     self.assertTrue(function.cfg.has_path(function.cfg.entry,
         Operation(BinaryOperation([Name('$t0'), '=', Numeral(5)])),
         Operation(BinaryOperation([Name('$t1'), '=', Numeral(1)])),
         Operation(FunctionCall(Name('__out__'), [Name('$t0'), Name('$t1')])),
         function.cfg.exit))
Esempio n. 19
0
 def testEmpty(self):
     function = FunctionDecl(void_type, 'f', [], Block([]))
     cfg = CFG('f')
     cfg.connect(cfg.entry, cfg.exit)
     function.cfg = cfg
     program = Program([function])
     linearise = self.assertSuccess(program)
     self.assertEquals(
         linearise.lines,
         [Label('f', public=True),
          Label('f$exit', public=True)])
Esempio n. 20
0
    def p_code(self, p):
        'code : VOID MAIN LPAREN RPAREN block'
        logging.debug('body: %s' % (p[5]))

        for node in p[5]:
            self.ast_file.write(str(node))

        self.ast_file.close()

        cfg = CFG(p[5])
        self.cfg_file.write(str(cfg))
        self.cfg_file.close()
Esempio n. 21
0
 def testOneStatement(self):
     function = FunctionDecl(void_type, 'f', [], Block([]))
     cfg = CFG('f')
     stmt = cfg.add(Operation(42))
     cfg.connect(cfg.entry, stmt)
     cfg.connect(stmt, cfg.exit)
     function.cfg = cfg
     program = Program([function])
     linearise = self.assertSuccess(program)
     self.assertEquals(linearise.lines, [
         Label('f', public=True),
         Instruction(42),
         Label('f$exit', public=True)
     ])
Esempio n. 22
0
 def testInfiniteLoop(self):
     function = FunctionDecl(void_type, 'f', [], Block([]))
     cfg = CFG('f')
     node = cfg.add(Operation(42))
     cfg.connect(cfg.entry, node, node)
     function.cfg = cfg
     program = Program([function])
     linearise = self.assertSuccess(program)
     self.assertEquals(linearise.lines, [
         Label('f', public=True),
         Label(2),
         Instruction(42),
         Jump(2),
         Label('f$exit', public=True)
     ])
Esempio n. 23
0
    def p_code(self, p):
        'code : global_statement_list'
        logging.debug('code: %s' % (p[1]))

        for node in p[1]:
            self.ast_file.write(str(node))

        self.ast_file.close()

        cfg = CFG(p[1])
        self.cfg_file.write(str(cfg))
        self.cfg_file.close()

        # print(self.tableptr.top())
        print_procedures(self.tableptr.top(), self.sym_file)
        print_variables(self.tableptr.top(), self.sym_file)
Esempio n. 24
0
    def __init__(self, samplesFile, batchSize, binSize, stdDevThreshold, windowSize, recurrentThreshold):
        CFGBuilder.__init__(self)

        self.stat = Statistics(binSize, stdDevThreshold)
        self.ifb = InputFileBuffer(50000, samplesFile)
        self.batchSize = batchSize
        self.binSize = binSize
        self.stdDevThreshold = stdDevThreshold
        self.samplesFile = samplesFile
        self.windowSize = windowSize
        self.recurrentThreshold = recurrentThreshold
        self.targets = dict()
        self.bbr = BBRepository()
        self.cfg = CFG()
        self.numMerge = 0
        self.highStdevEdges = 0
        self.numHighStdevTries = 0
        self.numHighStdevOK = 0
Esempio n. 25
0
 def parse_cfg(self):
     """Read from the stream, return a CFG object.
     This CFG object will be used to run the tasks.
     """
     lines = self.read_section()
     it = iter(lines)
     # variables and terminals are comma separated, with no whitespace
     vas = re.sub('\s', '', next(it)).split(',')
     tes = re.sub('\s', '', next(it)).split(',')
     start = next(it)
     # the remaining lines are rules V -> production
     rules = list()
     for line in it:
         v, production = line.split('->')
         v = v.strip()
         # separate on whitespace (excluding leading or trailing whitespace)
         production = re.sub('\s', ' ', production.strip())
         production = production.split(' ')
         rules.append((v, production))
     return CFG(vas, tes, start, rules)
Esempio n. 26
0
    def visit_FunctionDecl(self, func):
        cfg = CFG(func.name)
        func.cfg = cfg

        if hasattr(func, 'symbol_table'):
            cfg.symbol_table = SymbolTable(func.symbol_table.parent)
            cfg.symbol_table.embed(func.symbol_table)
        else:
            cfg.symbol_table = SymbolTable()

        prev_node = self.visit(func.body,
                               cfg=cfg,
                               entry=cfg.entry,
                               exit=cfg.exit)
        if prev_node is not None:
            cfg.connect(prev_node, cfg.exit)
Esempio n. 27
0
 def testTest(self):
     function = FunctionDecl(void_type, 'f', [], Block([]))
     cfg = CFG('f')
     test_node = cfg.add(Test(42))
     cfg.connect(cfg.entry, test_node)
     true_node = cfg.add(Operation(100))
     cfg.connect(test_node, TrueEdge(), true_node, cfg.exit)
     false_node = cfg.add(Operation(200))
     cfg.connect(test_node, FalseEdge(), false_node, cfg.exit)
     function.cfg = cfg
     program = Program([function])
     linearise = self.assertSuccess(program)
     self.assertEquals(linearise.lines, [
         Label('f', public=True),
         Branch(42, 3),
         Label(4),
         Instruction(200),
         Jump('f$exit'),
         Label(3),
         Instruction(100),
         Label('f$exit', public=True)
     ])
Esempio n. 28
0
import csv
import random
import sys
from cfg import CFG
cfg1 = CFG()
cfg1.add_prod('S', 'x|y|( S + S )|( S if B else S )')
cfg1.add_prod('B', ' ( S < S )|( S == S )|( S > S )|True|False')

if __name__ == '__main__':
    f = open(sys.argv[1])
    reader = csv.reader(f, delimiter=',')
    eg = [(r[0], r[1], r[2]) for r in reader]
    progs_done = 0
    OUTPUT = False
    while True:
        p = cfg1.get_next_prog()
        if (progs_done % 1000) == 0:
            OUTPUT = True
        if (OUTPUT):
            print("{} Programs evaluated".format(progs_done))
            print("Evaluating program {}".format(p))
        correct = 0
        for e in eg:
            if (OUTPUT):
                print("  Evaluating example {}".format(e))
            p_inst = p.replace('x', e[0])
            p_inst = p_inst.replace('y', e[1])
            if (OUTPUT):
                print("  Evaluation = %d" % eval(p_inst))
            if (eval(p_inst) == int(e[2])):
                correct += 1
Esempio n. 29
0
 def test_ssa_complex_1(self):
     cfg = CFG(complex1_cfg_blocks)
     cfg.convert_to_ssa()
Esempio n. 30
0
import argparse
import torch
import torch.utils.data as data
from loader_voxforge import *
from cfg import CFG

import csv

config = CFG()
"""
data_path_predict = "data/presentation"
vx = VOXFORGE(data_path_predict, langs=config.vx.langs,
              ratios=[0., 0., 1.],
              label_type="lang", use_cache=False,
              use_precompute=False)
vx.transform = config.vx.transform
vx.target_transform = config.vx.target_transform
vx = config.vx
vx.set_split("train")
config.vx = vx
dl = data.DataLoader(vx, batch_size=1, shuffle=False)
"""

config.vx.set_split("test")
RLENC = {v: k for (k, v) in config.vx.target_transform.vocab.items()}

model = config.model
model.eval()
correct = 0

for i, (mb, tgt) in enumerate(config.dl):
Esempio n. 31
0
 def test_ssa_simple(self):
     cfg = CFG(simple_cfg_blocks)
     cfg.convert_to_ssa()
Esempio n. 32
0
import SATA_PRETREAT
import os
import shutil
import scipy
import argparse
import matplotlib.pyplot as plt
import datetime
import GETPERDATA
from pyfaidx import Fasta
from cfg import CFG

parser = argparse.ArgumentParser()
parser.add_argument('-f', help="the folder input and ouput.")
args = parser.parse_args()

cfg = CFG()

# The below items should be check, modify if nessary.
OUTPUT = cfg.output
CLINICAL_ITEM = cfg.clinical_item
MIN_ALIVE = cfg.min_alive
VOTE_CLASS = 'Class_4'
VOTE_LINE = 1
CALC_NUM = 10
SATA_LIST = [  # 'ICD_O3_pathology',
    # 'ICD_O3_site',
    # 'ajcc_stage',
    'patient_age',
    # 'patient_gender',
    # 'patient_race',
    'patient_weight'
def buildCFG(cfile, function=None):
    """This function will allow us to create the CFG for a specific function
	or for the entire translation Unit.

	Parameters
	----------
	tu_cursor : `obj`:Cursor
		Cursor of the Translation Unit.
	function : str
		Name of the function from which we want to obtain the CFG.

	Return
	------
	`obj`:CFG
		A CFG object.

	"""
    # First we need to build de AST
    tu_cursor = buildAST(cfile)
    if function:
        # Retrieving the children from the translation unit
        tu_child = tu_cursor.get_children()
        func = None
        for e in tu_child:
            if str(e.spelling) == str(function):
                func = e
        # The function is not in the translation unit
        if not func:
            return None
        else:
            f_childs = func.get_children()
            # Avoiding ParamDecl and looking for the CompoundStmt
            elm = [c for c in f_childs]
            # The compound stmt is always at the end of the children of a func_decl
            c_stmt = elm[-1]
            # Transform the cursors of the function and the compound stmt to CFG decorators of
            # function and compound stmt
            f_decorator = Decl(func)
            c_stmt_decorator = CompoundStmt(c_stmt)
            # instance of a cfg object
            cfg = CFG()
            return cfg.buildCFG(f_decorator, c_stmt_decorator)

    else:
        # Retrieving the children from the translation Unit
        tu_childs = tu_cursor.get_children()
        cfgs = []
        # for each children of the translation unit, we check if it is a func_decl
        # by looking for the compound stmt
        cfg = CFGBuilder()
        for child in tu_childs:
            # If the element is a Funct Decl Cursor

            if child.kind is CursorKind.FUNCTION_DECL:
                childs = [c for c in child.get_children()]
                # Check if the last element is a compound statement
                if len(childs) > 0:
                    if childs[-1].kind is CursorKind.COMPOUND_STMT:
                        # FIXIT: accessing to a 'static' variable,
                        # find another way to access to the CursorKind
                        c_stmt = childs[-1]
                        # Transform the cursors of the function and the compound stmt to CFG decorators of
                        # function and compound stmt
                        f_decorator = FunctionDecl(child)
                        c_stmt_decorator = CompoundStmt(c_stmt)
                        # instance of a cfg object

                        # build the cfg
                        # print cfg
                        cfg_b = cfg.buildCFG(f_decorator, c_stmt_decorator)
                        # Save the cfg
                        cfgs.append([child.spelling, cfg_b])
        return cfgs
Esempio n. 34
0
class CFGBuilder1(CFGBuilder):
    '''
    :type samplesFile: string
    :type batchSize: int
    :type binSize: int
    :type stdDevThreshold: float
    :type windowSize: int
    :type recurrentThreshold: int
    :type targets: dict[int, int]
    :type bbr: BBRepository
    :type cfg: CFG
    '''

    def __init__(self, samplesFile, batchSize, binSize, stdDevThreshold, windowSize, recurrentThreshold):
        CFGBuilder.__init__(self)

        self.stat = Statistics(binSize, stdDevThreshold)
        self.ifb = InputFileBuffer(50000, samplesFile)
        self.batchSize = batchSize
        self.binSize = binSize
        self.stdDevThreshold = stdDevThreshold
        self.samplesFile = samplesFile
        self.windowSize = windowSize
        self.recurrentThreshold = recurrentThreshold
        self.targets = dict()
        self.bbr = BBRepository()
        self.cfg = CFG()
        self.numMerge = 0
        self.highStdevEdges = 0
        self.numHighStdevTries = 0
        self.numHighStdevOK = 0

    def buildCFGR(self, instrGen, justBuild, ib):
        for i in instrGen:

            b = self.stat.getBinFromAddr(i.pc)

            if b is None and (justBuild == 0):
                return

            if b is not None:
                recurrent = b.count > self.recurrentThreshold
            else:
                recurrent = False

            if recurrent or justBuild==1:
                # logger.debug("\t is target...")
                bb = self.bbr.getBB(i.pc)
                if not bb:
                    bb = BB(i.pc)
                    self.bbr.addBB(bb)

                while not bb.done:

                    x = self.bbr.getBB(i.pc)

                    if x:
                        if x.entryAddress != bb.entryAddress:
                            bb.done = 1
                            self.numMerge+=1
                            logger.debug("merging blocks %x and %x", bb.entryAddress, x.entryAddress)
                            bb.addTarget(x)
                            x.addSource(bb)
                            self.cfg.addOrIncrementEdge(bb, x)
                            break

                    if not bb.hasInstruction(i.pc):
                        bb.addInstruction(i)

                    if i.isBranchOrCall():
                        bb.done = 1

                        iafter = ib.getInstructionAfter(i)

                        if iafter is None:
                            break

                        #logger.debug("i: %s", i)
                        #logger.debug("iafter: %s", iafter)

                        if iafter.pc == i.target:
                            #branch taken

                            #logger.debug("0x%x: branch taken to 0x%x (%s)\n", i.pc, iafter.pc, i.text)

                            # justBuild = 1 if b.count > self.recurrentThreshold else 0
                            #
                            # self.buildCFGR(instrGen, justBuild, ib)

                            self.buildCFGR(instrGen, 0, ib)

                            targetBB = self.bbr.getBB(i.target)
                        else:
                            #branch not taken

                            #logger.debug("0x%x: fallthrough to 0x%x (%s)\n", i.pc, iafter.pc, i.text)

                            self.buildCFGR(instrGen, 0, ib)

                            targetBB = self.bbr.getBB(iafter.pc)

                        if targetBB:
                            bb.addTarget(targetBB)
                            targetBB.addSource(bb)
                            self.cfg.addOrIncrementEdge(bb, targetBB)
                    try:
                        i = instrGen.next()
                    except StopIteration:
                        break

            if i.isBranchOrCall():
                iafter = ib.getInstructionAfter(i)
                if iafter is not None:
                    targetBB = self.bbr.getBB(iafter.pc)
                    thisBB = self.bbr.getBB(i.pc)
                    if targetBB and thisBB:
                        thisBB.addTarget(targetBB)
                        targetBB.addSource(thisBB)
                        self.cfg.addOrIncrementEdge(thisBB, targetBB)

    def buildCFG(self):

        # ignore the first line in the samples file
        self.ifb.getLine()

        moreBatches = not self.ifb.eof

        commonBinIns = dict()
        totalIns = dict()

        lowstdev = 0
        highstdev = 0

        stddevs = []

        while (moreBatches):

            ib = InstructionBatch(self.batchSize, self.ifb)
            moreBatches = ib.fromFile()
            ib.calcStatistics(self.windowSize, 1)
            self.stat.registerLowStDevStatistics(ib)

            stddevs.append(ib.meanWindowStdev)

            if ib.batchId % 100 == 0:
                logger.debug("batch %d", ib.batchId)

            if ib.meanWindowStdev <= self.stdDevThreshold:
                instrGen = ib.genInstruction()
                self.buildCFGR(instrGen, 0, ib)
                lowstdev+=1
            else:
                highstdev+=1

                printedIns = False
                instrGen = ib.genInstruction()

                for i in instrGen:

                    if i.isBranchOrCall():

                        b = self.stat.getBinFromAddr(i.pc)

                        if b is None:
                            continue

                        bb = self.bbr.getBB(i.pc)

                        if bb and b.count > self.recurrentThreshold:
                            iafter = ib.getInstructionAfter(i)
                            if iafter is not None:
                                otherBB = self.bbr.getBB(iafter.pc)
                                if not otherBB:
                                    logger.debug("trying to create a new BB for %x", iafter.pc)
                                    self.buildCFGR(instrGen, 1, ib)
                                    self.numHighStdevTries+=1
                                    otherBB = self.bbr.getBB(iafter.pc)
                                    if otherBB:
                                        logger.debug("got it!")
                                        self.numHighStdevOK+=1

                                if otherBB:
                                    bb.addTarget(otherBB)
                                    otherBB.addSource(bb)
                                    self.cfg.addOrIncrementEdge(bb, otherBB)
                                    self.highStdevEdges+=1

        self.cfg.toDot("test_builder1.dot", True, False)
        self.cfg.printCFG()

        print len(self.bbr.blocks), " basic blocks were recognized"
        print len(self.stat.bins), " address bins were created"
        print lowstdev, " low standard deviation batches"
        print highstdev, " high standard deviation batches"

        totalBBIns = []
        for i in self.bbr.blocks:
            totalBBIns.append(len(self.bbr.blocks[i].instructions))

        print "each block has an average of ",  np.mean(totalBBIns), "+-" , np.std(totalBBIns), " instructions"
        print "number of basic block merges: ", self.numMerge
        print "number of high standard deviation recurrent edges marked: ", self.highStdevEdges
        print "number of high standard deviation basic block build tries: ", self.numHighStdevTries
        print "number of high standard deviation basic block actually built: ", self.numHighStdevOK

        #implementar metricas: quantidade de instrucoes e blocos basicos por funcao

        stdev = file(self.samplesFile + ".stdev", 'w')
        for i in stddevs:
            stdev.write(str(i) + "\n")
        stdev.close()
Esempio n. 35
0
    def test_follow(self):
        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'A B C'},
                'A': {'a A', '&'},
                'B': {'b B', 'A C d'},
                'C': {'c C', '&'},
            },
        )

        self.assertSetEqual({'$'}, cfg.follow('S'))
        self.assertSetEqual({'a', 'b', 'c', 'd'}, cfg.follow('A'))
        self.assertSetEqual({'c', '$'}, cfg.follow('B'))
        self.assertSetEqual({'d', '$'}, cfg.follow('C'))

        cfg = CFG.create(
            initial_symbol='E',
            productions={
                'E': {"T E'"},
                "E'": {"+ T E'", '&'},
                'T': {"F T'"},
                "T'": {"* F T'", '&'},
                'F': {'( E )', 'id'}
            },
        )

        self.assertSetEqual({')', '$'}, cfg.follow('E'))
        self.assertSetEqual({')', '$'}, cfg.follow("E'"))
        self.assertSetEqual({'+', ')', '$'}, cfg.follow('T'))
        self.assertSetEqual({'+', ')', '$'}, cfg.follow("T'"))
        self.assertSetEqual({'*', '+', ')', '$'}, cfg.follow("F"))

        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'a S a', 'b S b', 'a', 'b'},
            },
        )
        self.assertEqual({'a', 'b', '$'}, cfg.follow('S'))

        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'A a A b', 'B b B a'},
                'A': {'&'},
                'B': {'&'},
            },
        )
        self.assertEqual({'a', 'b'}, cfg.follow('A'))
        self.assertEqual({'a', 'b'}, cfg.follow('B'))

        cfg = CFG.create(
            initial_symbol='S',
            productions={
                'S': {'a B S', 'b A S', '&'},
                'A': {'a', 'b A A'},
                'B': {'b', 'a B B'},
            },
        )
        self.assertSetEqual({'$'}, cfg.follow('S'))
        self.assertSetEqual({'$', 'a', 'b'}, cfg.follow('A'))
        self.assertSetEqual({'$', 'a', 'b'}, cfg.follow('B'))
Esempio n. 36
0
    def optimise(self):
        """
        Optimise the IR.

        Procedure:
         1. split in frames
         2. convert frames to graphs
         3. optimise graphs
         4. convert graphs to (flat) frames
         5. concatenate frames to get optimised program.

        Store result in flat.
        
        """

        self.logger.info('optimising global control flow graph')
        
        cfg = CFG(self.flat)
        if self.verbosity > 2:
            cfg.cfg_to_diagram("allinstr_graph_before.png")    
        optimise_tree.optimise(cfg)
        if self.verbosity > 2:
            cfg.cfg_to_diagram("allinstr_graph_after.png")      
        self.flat = cfg.cfg_to_flat()
        
        self.logger.info('optimising flat (jumps and branches)')
        self.flat = flat_opt.optimise(self.flat)
        
        
         


        self.logger.info('splitting flat in frames')
        frames = split_frames(self.flat)
        self.logger.info('creating graph for each frame')
        graphs = [CFG(frame) for frame in frames]  
        
        self.logger.info('optimising blocks')    

        for graphnr, graph in enumerate(graphs):
            self.logger.info('graph %d of %d' % (graphnr + 1, len(graphs)))

            Dataflow(graph)
            l = Liveness(graph,self.verbosity)

            #self.logger.info('Performing liveness optimalisation on graph')
            #change = True
            #while change:
            #    l.analyse()
            #    change = l.optimise()   
                        
            for blocknr, block in enumerate(graph.blocks):
            
                self.logger.debug('block %d of %d' % (blocknr + 1, len(graph.blocks)))
               
                cf_opt = b_opt.ConstantFold(block)
                cp_opt = b_opt.CopyPropagation(block)
                dc_opt = b_opt.DeadCode(block)

                done = False
                subopt_changes = False
                i = 0

                while (not done):
                    done = True
                    i += 1
                    self.logger.debug('pass '+str(i))

                    subopt_changes = cf_opt.optimise()
                    if subopt_changes:self.stats['cf'] += cf_opt.stats['cf']
                    done = done & (not subopt_changes)

                    subopt_changes = cp_opt.optimise()
                    if subopt_changes:self.stats['cp'] += cp_opt.stats['cp']
                    done = done & (not subopt_changes)
                    
                    subopt_changes = dc_opt.optimise()
                    if subopt_changes:self.stats['dc'] += dc_opt.stats['dc']
                    done = done & (not subopt_changes)

        self.logger.info('basic-block peephole optimisations done:')
        self.logger.info('\t\tconstant folds: %d' % (self.stats['cf']))
        self.logger.info('\t\tcopy propagations: %d' % (self.stats['cp']))
        self.logger.info('\t\tdead code removes: %d' % (self.stats['dc']))
        self.logger.info('joining graphs to frames')
        frames = [graph.cfg_to_flat() for graph in graphs]
        self.logger.info('joining frames to flat')
        self.flat = sum(frames, [])
Esempio n. 37
0
    def optimise(self):
        """
        Optimise the IR.

        Procedure:
         1. split in frames
         2. convert frames to graphs
         3. optimise graphs
         4. convert graphs to (flat) frames
         5. concatenate frames to get optimised program.

        Store result in flat.
        
        """


        # top loop
        
        flat_orig = None
        top_loop_counter = 0
        while True:
            if flat_orig == self.flat:
                self.logger.info('optimisation is stable')
                break
            if top_loop_counter == 10000:
                self.logger.warning('top loop limit reached (10000 iterations)')
                break
            flat_orig = self.flat[:]
            top_loop_counter += 1
            self.logger.info('top pass %s' % str(top_loop_counter))           

            # a.
            if 'a' in self.enabled_optimisations:
                self.logger.info('optimising global control flow graph')
            
                cfg = CFG(self.flat)
                #if self.verbosity > 2:
                #    cfg.cfg_to_diagram("allinstr_graph_before.png")    
        
                optimise_tree.optimise(cfg)
                #if self.verbosity > 2:
                #    cfg.cfg_to_diagram("allinstr_graph_after.png")      
                self.flat = cfg.cfg_to_flat()
              
            # b. jump optimisations 
            if 'b' in self.enabled_optimisations:
                self.logger.info('optimising flat (jumps and branches)')
                self.flat = flat_opt.optimise(self.flat)
            
            self.flat = parse_instr.parse(self.flat)
            self.logger.info('splitting flat in frames')
            frames = split_frames(self.flat)
            self.logger.info('creating graph for each frame')
            graphs = [CFG(frame) for frame in frames]  
            
            
            self.logger.info('optimising blocks')    

            for graphnr, graph in enumerate(graphs):
                self.logger.info('graph %d of %d' % (graphnr + 1, len(graphs)))

                #Dataflow(graph)
                if 'f' in self.enabled_optimisations:
                    l = Liveness(graph,self.verbosity)
                    self.logger.info('Performing liveness optimalisation on graph')
                    change = True
                    while change:
                        l.analyse()
                        change = l.optimise()   
                            
                for blocknr, block in enumerate(graph.blocks):
                
                    self.logger.debug('block %d of %d' % (blocknr + 1, len(graph.blocks)))
                   
                    cf_opt = b_opt.ConstantFold(block)
                    cp_opt = b_opt.CopyPropagation(block)
                    dc_opt = b_opt.DeadCode(block)

                    done = False
                    subopt_changes = False
                    i = 0

                    while (not done):
                        done = True
                        i += 1
                        self.logger.debug('\t pass '+str(i))
                   
                        # c. constant folding
                        if 'c' in self.enabled_optimisations:
                            subopt_changes = cf_opt.optimise()
                            if subopt_changes: self.stats['cf'] += cf_opt.stats['cf']
                            done = done & (not subopt_changes)

                        # d. copy propagation
                        if 'd' in self.enabled_optimisations:
                            subopt_changes = cp_opt.optimise()
                            if subopt_changes:self.stats['cp'] += cp_opt.stats['cp']
                            done = done & (not subopt_changes)
                        
                        # e. dead code removal
                        if 'e' in self.enabled_optimisations:
                            subopt_changes = dc_opt.optimise()
                            if subopt_changes:self.stats['dc'] += dc_opt.stats['dc']
                            done = done & (not subopt_changes)

            self.logger.info('basic-block peephole optimisations done:')
            self.logger.info('\t constant folds: %d' % (self.stats['cf']))
            self.logger.info('\t copy propagations: %d' % (self.stats['cp']))
            self.logger.info('\t dead code removes: %d' % (self.stats['dc']))

            self.logger.info('joining graphs to frames')
            frames = [graph.cfg_to_flat() for graph in graphs]
            self.logger.info('joining frames to flat')
            self.flat = sum(frames, [])
Esempio n. 38
0
from cfg import T, NT, CFG

Z = NT()
P = NT()
M = NT()
a = T('a')
b = T('b')

g = CFG(Z, [
    (Z, (a, P)),
    (Z, (b, M)),
    (P, (b,)),
    (P, (a,P,P)),
    (M, (a,)),
    (M, (b,M,M))
]).kleene()
g_ = g.chomsky_normal_form()
print(g_.accepts('baabbbaaabba'))
print(g_.accepts('baabbaaabba'))

pl = g.get_pumping_lemma()
print(pl)
u,v,w,x,y = pl
for i in range(20):
    print("%s%s%s%s%s" % (u, v*i, w, x*i, y))