def test_iter(self): cfg = CFG([Variable("S"), Variable("A"), Variable("B"), Variable("C")], [Terminal("a"), Terminal("b")], [ CFGRule(Variable("S"), [Variable("A"), Variable("B")]), CFGRule(Variable("S"), [Terminal("a")]), CFGRule(Variable("A"), [Terminal("b")]), CFGRule(Variable("A"), [Variable("C")]) ], Variable("S")) words = [x for x in cfg] self.assertEqual(words, [[Terminal("a")]]) cfg = CFG([Variable("S"), Variable("A"), Variable("C")], [Terminal("a"), Terminal("b")], [ CFGRule(Variable("S"), [Variable("A")]), CFGRule(Variable("S"), [Terminal("a")]), CFGRule(Variable("A"), [Terminal("b")]), CFGRule(Variable("A"), [Variable("C")]) ], Variable("S")) words = [x for x in cfg] self.assertIn([Terminal("a")], words) self.assertIn([Terminal("b")], words) self.assertEqual(len(words), 2) cfg = CFG([Variable("S"), Variable("A"), Variable("B")], [Terminal("a"), Terminal("b")], [ CFGRule(Variable("S"), [Variable("S"), Variable("S")]), CFGRule(Variable("S"), [Terminal("a")]), CFGRule(Variable("A"), [Terminal("b")]) ], Variable("S")) it = iter(cfg) word = next(it) self.assertEqual(set(word), set([Terminal("a")])) word = next(it) self.assertEqual(set(word), set([Terminal("a")])) word = next(it) self.assertEqual(set(word), set([Terminal("a")])) word = next(it) self.assertEqual(set(word), set([Terminal("a")])) word = next(it) self.assertEqual(set(word), set([Terminal("a")])) cfg = CFG([Variable("S"), Variable("A"), Variable("B")], [Terminal("a"), Terminal("b")], [ CFGRule(Variable("S"), [Variable("A"), Variable("S")]), CFGRule(Variable("S"), [Terminal("a")]), CFGRule(Variable("A"), [Terminal("b")]) ], Variable("S")) it = iter(cfg) temp = [next(it) for _ in range(100)] self.assertIn( [Terminal("b"), Terminal("b"), Terminal("b"), Terminal("a")], temp)
def test_emptiness(self): cfg = CFG([Variable("S"), Variable("A"), Variable("B")], [Terminal("a"), Terminal("b")], [ CFGRule(Variable("S"), [Variable("A"), Variable("B")]), CFGRule(Variable("S"), [Terminal("a")]), CFGRule(Variable("A"), [Terminal("b")]) ], Variable("S")) self.assertEqual( set([Variable("S"), Variable("A"), Terminal("a"), Terminal("b")]), cfg.get_reachable()) self.assertFalse(cfg.is_empty()) cfg = CFG([Variable("S"), Variable("A"), Variable("B")], [Terminal("a"), Terminal("b")], [ CFGRule(Variable("S"), [Variable("A"), Variable("B")]), CFGRule(Variable("S"), [Terminal("a")]), CFGRule(Variable("A"), [Terminal("b")]) ], Variable("B")) self.assertEqual( set([Variable("S"), Variable("A"), Terminal("a"), Terminal("b")]), cfg.get_reachable()) self.assertTrue(cfg.is_empty())
def construct(self): def extract_calls(cfg): # get all calls from cfg calls = {} visited = set() work_list = [cfg.start] while len(work_list) > 0: op = work_list.pop() visited.add(op) # indirect calls (call %rcx, etc) not currently supported if op.id == X86_INS_CALL and op.operands[0].type == X86_OP_IMM: calls[op.address] = op.operands[0].imm for succ in op.succs: succ_op = cfg[succ] if succ_op not in visited: work_list.append(succ_op) return calls worklist = [self.start_addr] while len(worklist) > 0: func_addr = worklist.pop() func_cfg = CFG(self.elf, func_addr) func_calls = extract_calls(func_cfg) self.cfgs[func_addr] = func_cfg self.calls.update(func_calls) worklist.extend(set(func_calls.values()) - set(self.cfgs))
def test_intersect(self): cfg = CFG([Variable("S"), Variable("A")], [Terminal("a"), Terminal("b")], [ CFGRule(Variable("S"), [Variable("A"), Variable("A")]), CFGRule(Variable("S"), [Terminal("a")]), CFGRule(Variable("A"), [Terminal("b")]) ], Variable("S")) regex = RegexTree(Node("a")) fsm = regex.to_fsm() fsm.close() cfg_temp = cfg.intersect(fsm) self.assertFalse(cfg_temp.is_empty()) regex = RegexTree(Node("b")) fsm = regex.to_fsm() fsm.close() cfg_temp = cfg.intersect(fsm) self.assertFalse(cfg_temp.is_empty()) regex = RegexTree(Node("b,b")) fsm = regex.to_fsm() fsm.close() cfg_temp = cfg.intersect(fsm) self.assertFalse(cfg_temp.is_empty()) regex = RegexTree(Node("b,a")) fsm = regex.to_fsm() fsm.close() cfg_temp = cfg.intersect(fsm) self.assertTrue(cfg_temp.is_empty())
def test_to_pda(self): cfg = CFG([Variable("E"), Variable("I")], [ Terminal("a"), Terminal("b"), Terminal("0"), Terminal("1"), Terminal("+"), Terminal("*"), Terminal("("), Terminal(")") ], [ CFGRule(Variable("I"), [Terminal("a")]), CFGRule(Variable("I"), [Terminal("b")]), CFGRule(Variable("I"), [Variable("I"), Terminal("a")]), CFGRule(Variable("I"), [Variable("I"), Terminal("b")]), CFGRule(Variable("I"), [Variable("I"), Terminal("0")]), CFGRule(Variable("I"), [Variable("I"), Terminal("1")]), CFGRule(Variable("E"), [Variable("I")]), CFGRule( Variable("E"), [Variable("E"), Terminal("*"), Variable("E")]), CFGRule( Variable("E"), [Variable("E"), Terminal("+"), Variable("E")]), CFGRule( Variable("E"), [Terminal("("), Variable("E"), Terminal(")")]) ], Variable("E")) pda = cfg.to_PDA() self.assertIsInstance(pda, PDA) self.assertIn( PDATransitionFunction(PDAState("q"), "epsilon", Variable("I"), PDAState("q"), [Terminal("a")]), pda.transition_function) self.assertIn( PDATransitionFunction(PDAState("q"), "epsilon", Variable("I"), PDAState("q"), [Variable("I"), Terminal("0")]), pda.transition_function) self.assertIn( PDATransitionFunction(PDAState("q"), "epsilon", Variable("E"), PDAState("q"), [Variable("I")]), pda.transition_function) self.assertEqual(18, len(pda.transition_function)) self.assertTrue(pda.accepts_by_empty_stack([Terminal("a")], 100)) self.assertTrue(pda.accepts_by_empty_stack([Terminal("b")], 100)) self.assertFalse( pda.accepts_by_empty_stack([Terminal(x) for x in "b0"], 100)) self.assertTrue( pda.accepts_by_empty_stack([Terminal(x) for x in "b0"], 1000)) self.assertTrue( pda.accepts_by_empty_stack([Terminal(x) for x in "b00"], 10000))
def to_CFG(self, preprocess=False): if preprocess: return self.__preprocess_to_CFG().to_CFG(False) self.__cache_variables = dict() self.__cache_counter = 0 from CFG import CFG start = Variable("S") terminals = list(map(Terminal, self.input_symbols[:])) productions = self.__init_productions_to_CFG(start) counter = 0.0 for trans in self.transition_function: counter += 1 self.__process_transition_to_CFG(trans, productions) variables = list(self.__cache_variables.values()) variables.append(start) return CFG(variables, terminals, productions, start)
def test_repr(self): cfg = CFG([Variable("I"), Variable("J")], [Terminal("a"), Terminal("b")], [CFGRule(Variable("I"), [Terminal("a"), Variable("I")])], Variable("I")) r = str(cfg) self.assertIn(str(Variable("I")), r) self.assertIn(str(Variable("J")), r) self.assertIn(str(Terminal("a")), r) self.assertIn(str(Terminal("b")), r) self.assertIn("Variables", r) self.assertIn("Terminals", r) self.assertIn("Productions", r) self.assertIn("Start", r) self.assertEqual(r.count("->"), 1) self.assertEqual(r.count("\n"), 4)
def train(cfg_file, train_file, iter_num=20): cfg = CFG(cfg_file=cfg_file) pcfg = PCFG_EM(train_file=train_file, CFG=cfg) (name, ext) = os.path.splitext(train_file) state = pcfg.EM(iter_num=iter_num) with open(name + '.pcfg', 'w') as f: for (A, B, C) in cfg.binary_rules: f.writelines(A + ' -> ' + B + ' ' + C + ' ' + str(state.get((A, B, C))) + '\n') for (A, w) in cfg.unary_rules: f.writelines(A + ' -> ' + w + ' ' + str(state.get((A, w))) + '\n') with open(name + '.gen', 'w') as f: for i in range(2000): f.writelines(pcfg.gen_sentence('S') + '\n')
def __init__(self): cfg = CFG('../../cfg/farmersmarket.yaml') self._db = DB(cfg.data['db']['type'], cfg.data['db']['host'], cfg.data['db']['user'], cfg.data['db']['pass'], cfg.data['db']['dbname']) self._db.connect() # Set log file self._log = LOG('../../log/api.log') self._log.logger.info('Opening connection to the database')
def __init__(self, argv): cfg = CFG('../cfg/farmersmarket.yaml') # Open connection database self._db = DB(cfg.data['db']['type'], cfg.data['db']['host'], cfg.data['db']['user'], cfg.data['db']['pass'], cfg.data['db']['dbname']) self._db.connect() # Set CSV file path self._file = argv[1] # Set log file self._log = LOG('../log/export-CSV2DB.log') self._log.logger.info('Opening connection to the database')
def test_paper(self): cfg = CFG([Variable("S"), Variable("C")], [ Terminal("a"), Terminal("b"), Terminal("c"), Terminal("q"), Terminal("am"), Terminal("bm"), Terminal("cm"), Terminal("qm") ], [ CFGRule( Variable("S"), [Variable("C"), Terminal("q"), Variable("C")]), CFGRule( Variable("C"), [Terminal("a"), Variable("C"), Terminal("am")]), CFGRule( Variable("C"), [Terminal("b"), Variable("C"), Terminal("bm")]), CFGRule( Variable("C"), [Terminal("c"), Variable("C"), Terminal("cm")]), CFGRule( Variable("C"), [Terminal("q"), Variable("C"), Terminal("qm")]), CFGRule(Variable("C"), [Variable("C"), Variable("C")]), CFGRule(Variable("C"), []) ], Variable("S")) regex = RegexTree(Node("(a,b)|(bm,c)|(cm,am,q)")) fsm = regex.to_fsm() fsm.close() cfg_temp = cfg.intersect(fsm) self.assertFalse(cfg_temp.is_empty()) regex = RegexTree(Node("(a,b)|(b,c)|(cm,am,q)")) fsm = regex.to_fsm() fsm.close() cfg_temp = cfg.intersect(fsm) self.assertTrue(cfg_temp.is_empty())
def train_DQN(): #init confirgations cfg = CFG() best_time_step = 0. #DQN brain dqn = DQNBrain(cfg) if cfg.use_cuda: dqn = dqn.cuda() #game start flappyBird = game.GameState() #set optimizer optimizer = torch.optim.RMSprop(dqn.parameters(), lr=cfg.lr) ceriterion = nn.MSELoss() #init replay memory by random action for i in range(cfg.observations): action = dqn.get_action_randomly() o, r, terminal = flappyBird.frame_step(action) o = preprocess(o) dqn.store_transition(o, action, r, terminal) for episode in range(cfg.max_episode): total_value = 0 while True: optimizer.zero_grad() if random.random() <= cfg.epsilon: action = dqn.get_action_randomly() else: action = dqn.get_action_optim() o_next, r, terminal = flappyBird.frame_step(action) total_value += cfg.gamma*total_value + r o_next = preprocess(o_next) #update replay memory dqn.store_transition(o_next, action, r, terminal) dqn.increase_step() #train dqn brain model by one batch #step 1: sample training data from replay memory minibatch = random.sample(dqn.replayMemory, cfg.batch_size) state_batch = np.array([data[0] for data in minibatch]) action_batch = np.array([data[1] for data in minibatch]) reward_batch = np.array([data[2] for data in minibatch]) next_state_batch = np.array([data[3] for data in minibatch]) state_batch_var = Variable(torch.from_numpy(state_batch)) next_state_batch_var = Variable(torch.from_numpy(next_state_batch)) if cfg.use_cuda: state_batch_var = state_batch_var.cuda() next_state_batch_var = next_state_batch_var.cuda() #step 2: get label y q_value = dqn.forward(state_batch_var) q_value_next = dqn.forward(next_state_batch_var) y_batch = reward_batch.astype(np.float32) max_q, _ = torch.max(q_value_next, dim=1) for i in range(cfg.batch_size): if not minibatch[i][4]: #terminal y_batch[i] = y_batch[i]*cfg.gamma + max_q.data[i] y_batch = Variable(torch.from_numpy(y_batch)) action_batch_var = Variable(torch.from_numpy(action_batch))#predict action if cfg.use_cuda: y_batch = y_batch.cuda() action_batch_var = action_batch_var.cuda() q_value = torch.sum(torch.mul(action_batch_var, q_value), dim=1)#predict value #step 3: bp to update model loss = ceriterion(q_value, y_batch) loss.backward() optimizer.step() #end episode when bird's dead if terminal: dqn.time_step = 0 break #update epsilon if dqn.epsilon > cfg.final_e: delta = (cfg.init_e - cfg.final_e)/cfg.exploration dqn.epsilon -= delta #test dqn per 100 episode if episode % 100 == 0: ave_step = test_DQN(dqn, episode)
def test_creation(self): cfg = CFG([], [], [], Variable("")) self.assertIsInstance(cfg, CFG)
def __init__(self, grammarFile): self.tree = [] cfg = CFG(grammarFile) self.cfg = cfg.get_grammar()
def __init__(self, filename, func='main'): elf = ELF(filename) addr = elf.sym[func] self.cfg = CFG(elf, addr) self.paths = [SymState(self.cfg, elf)]
dst, src = op.operands op_1 = state.read_value(dst) op_2 = state.read_value(src) if isinstance(op_1, Immediate) and isinstance(op_2, Immediate): val = Immediate(op_1.value & op_2.value) else: val = Unknown() new_state.store_value(dst, val) return new_state if __name__ == '__main__': import sys if len(sys.argv) != 2: print('Usage: {} <file>'.format(sys.argv[0])) sys.exit() e = ELF(sys.argv[1]) main_addr = e.symbols['main'] cfg = CFG(e, main_addr) start = MachineState() start.regs[X86_REG_RSP] = StackPointer(0) vars = ConstantAnalysis(cfg, entry_state=start) for op_addr in sorted(cfg.ops): op = cfg.ops[op_addr] print('{:120s} -- {}'.format(vars.before_states[op], op_str(op)))
def main(): parser = argparse.ArgumentParser( description='MFS and CFG file manipulation utility.', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" The default output is to stdout. Either one of --mfs or --cfg must be specified to indicate on which \ type of file to work (MFS or CFG). You can specify one of the mutually exclusive actions : \ --dump --zip, --extract, --add, --remove. For the --extract, --add, --remove actions, if --mfs is specified, \ then --file-id is required, if --cfg is specified, then --file-path is required. When adding a file to a CFG file, the --mode, --opt, --uid and --gid options can be added. The --mode option needs to be a string in the form 'dAEIrwxrwxrwx' where \ unused bits can be either a space or a dash, like --mode ' rwx---rwx' for example. The --opt option needs to be a string in the form '?!MF' where unused bits can be \ either a space or a dash. When adding a directory, both the file path needs to end with a '/' character and the --mode needs to start with 'd'. """) parser.add_argument("-o", "--output", dest="output", default='-', help="Output file to write", metavar="FILE") parser.add_argument("-i", "--file-id", dest="file_id", type=int, help="ID of the file to manipulate in the MFS file", metavar="ID") parser.add_argument("-f", "--file-path", dest="file_path", help="Path of the file to manipulate in the CFG file", metavar="PATH") parser.add_argument("--mode", dest="mode", default="---rwxrwxrwx", help="Mode for file being added to CFG", metavar="MODE") parser.add_argument("--opt", dest="opt", default="----", help="Deplyoment option for file being added to CFG", metavar="OPT") parser.add_argument("--uid", dest="uid", default=0, type=int, help="User ID for file being added to CFG", metavar="UID") parser.add_argument("--gid", dest="gid", default=0, type=int, help="Group ID for file being added to CFG", metavar="GID") parser.add_argument("--recursive", dest="recursive", action="store_true", help="Recursive deletion for a file path in CFG") parser.add_argument("--alignment", dest="alignment", type=int, default=0, help="Alignment type for CFG files. (default: 0).\n" "0 : packed.\n" "1 : align all files on chunk start.\n" "2 : align end of files on end of chunk.") parser.add_argument( "--deoptimize", dest="optimize", action="store_false", help="De-optimize chain sequences when adding a file to MFS.") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-m", "--mfs", dest="mfs", type=argparse.FileType('rb'), help="MFS file to read from", metavar="FILE") group.add_argument("-c", "--cfg", dest="cfg", type=argparse.FileType('rb'), help="CFG file to read from", metavar="FILE") group = parser.add_mutually_exclusive_group(required=True) group.add_argument( "-d", "--dump", dest='dump', action="store_true", help="Dump information about the MFS file, or the CFG file") group.add_argument("-z", "--zip", dest='zip', action="store_true", help="Store the MFS contents to a ZIP file") group.add_argument( "-x", "--extract", dest='extract', action="store_true", help="Extract a file from the MFS file, or a file from the CFG file") group.add_argument( "-a", "--add", dest='add', type=argparse.FileType('rb'), help="Add a file to the MFS file or a file to the CFG file", metavar="FILENAME") group.add_argument( "-r", "--remove", dest='remove', action="store_true", help="Remove a file from the MFS file, or a file from the CFG file") args = parser.parse_args() if (args.add or args.remove or args.extract) and (args.cfg and args.file_path is None): parser.error( "--add/--remove/--extract on a --cfg file requires the --file-path option" ) if (args.add or args.remove or args.extract) and (args.mfs and args.file_id is None): parser.error( "--add/--remove/--extract on a --mfs file requires the --file-id option" ) if args.mfs is not None: data = args.mfs.read() mfs = MFS(data) if args.dump: with argparse.FileType("wb")(args.output) as f: f.write("%s" % mfs) elif args.extract: file = mfs.getSystemVolume().getFile(args.file_id) if file: with argparse.FileType("wb")(args.output) as f: f.write(file.data) else: print "File ID %d does not exist in the MFS System Volume" % args.file_id sys.exit(-1) elif args.remove: mfs.getSystemVolume().removeFile(args.file_id) mfs.generate() with argparse.FileType("wb")(args.output) as f: f.write(mfs.data) elif args.add: file = mfs.getSystemVolume().getFile(args.file_id) if file: print "File ID %d already exists in the MFS System Volume" % args.file_id sys.exit(-1) data = args.add.read() mfs.getSystemVolume().addFile(args.file_id, data, args.optimize) mfs.generate() with argparse.FileType("wb")(args.output) as f: f.write(mfs.data) elif args.zip: z = zipfile.ZipFile(args.output, "w", zipfile.ZIP_STORED) for id in xrange(mfs.getSystemVolume().numFiles): file = mfs.getSystemVolume().getFile(id) if file: zi = zipfile.ZipInfo("file_%d.bin" % id) zi.external_attr = (0644 << 16) z.writestr(zi, file.data) z.close() else: data = args.cfg.read() cfg = CFG(data) if args.dump: with argparse.FileType("wb")(args.output) as f: f.write("%s" % cfg) cfg.generate(args.alignment) #with argparse.FileType("wb")(args.output) as f: f.write(cfg.data) assert cfg.data == data elif args.zip: z = zipfile.ZipFile(args.output, "w", zipfile.ZIP_STORED) for file in cfg.files: path = file.path if file.isDirectory(): path += posixpath.sep attr = (040755 << 16) | 0x30 else: attr = (0644 << 16) zi = zipfile.ZipInfo(path) zi.external_attr = attr z.writestr(zi, file.data) z.close() elif args.extract: file = cfg.getFile(args.file_path) if file is None: print "File path '%s' does not exist in the CFG file" % args.file_path sys.exit(-1) with argparse.FileType("wb")(args.output) as f: f.write(file.data) elif args.remove: res = cfg.removeFile(args.file_path, args.recursive) if not res: if cfg.getFile(args.file_path) is None: print "File path '%s' does not exist in the CFG file" % args.file_path else: print "File path '%s' is a non-empty directory in the CFG file (use --recursive)" % args.file_path sys.exit(-1) cfg.generate(args.alignment) with argparse.FileType("wb")(args.output) as f: f.write(cfg.data) elif args.add: file = cfg.getFile(args.file_path) if file: print "File path '%s' already exists in the CFG file" % args.file_path sys.exit(-1) data = args.add.read() mode = CFG.strToMode(args.mode) opt = CFG.strToOpt(args.opt) if args.file_path[-1] == '/': assert mode & 0x1000 == 0x1000 else: assert mode & 0x1000 == 0 if not cfg.addFile(args.file_path, data, mode, opt, args.uid, args.gid): print "Error adding file to path '%s' in the CFG file " \ "(parent doesn't exist or is not a directory?)" % args.file_path sys.exit(-1) cfg.generate(args.alignment) with argparse.FileType("wb")(args.output) as f: f.write(cfg.data)