def test_get_shortest_path(self): automata = Automata() state0 = State('state0') state1 = State('state1') state2 = State('state2') state3 = State('state3') state4 = State('state4') state5 = State('state5') state6 = State('state6') automata.add_state(state0) automata.add_state(state1) automata.add_state(state2) automata.add_state(state3) automata.add_state(state4) automata.add_state(state5) automata.add_state(state6) automata.add_edge(state0, state1, Clickable('0-1')) automata.add_edge(state0, state2, Clickable('0-2')) automata.add_edge(state0, state3, Clickable('0-3')) automata.add_edge(state2, state4, Clickable('2-4')) automata.add_edge(state4, state5, Clickable('4-5')) automata.add_edge(state3, state5, Clickable('3-5')) automata.add_edge(state3, state5, Clickable('5-0')) automata.add_edge(state5, state6, Clickable('5-6')) self.assertEqual(automata.get_shortest_path(state0), []) edges = automata.get_shortest_path(state6) # 0-3, 3-5, 5-6 self.assertEqual([int(e[0].get_id()) for e in edges], [0, 3, 5])
def load_automata(fname): t_start = time.time() assert os.path.isfile(fname) and os.path.exists(fname) automata = Automata() with open(fname) as f: data = json.load(f) for state in data['state']: with open(os.path.join(os.path.dirname(os.path.realpath(fname)), state['dom_path']), 'r') as df: s = State(df.read()) s.set_id(state['id']) for clickable in state['clickable']: c = Clickable(clickable['id'], clickable['xpath'], clickable['tag']) s.add_clickable(c) automata.add_state(s) for edge in data['edge']: from_state = automata.get_state_by_id(edge['from']) to_state = automata.get_state_by_id(edge['to']) clickable = from_state.get_clickable_by_id(edge['clickable']) assert from_state and to_state and clickable automata.add_edge(from_state, to_state, clickable) return automata
def run(): automata = Automata('q1', ['q3', 'q5', 'q7', 'q8'] + ['q{}'.format(i) for i in range(9, 41)]) primitives = { 'q3': 'INTEGER NUMBER', 'q5': 'REAL NUMBER', 'q7': 'COMMENT', 'q8': 'IDENTIFIER', 'q9': 'IDENTIFIER', 'q10': 'INT', 'q11': 'IDENTIFIER', 'q12': 'IDENTIFIER', 'q13': 'IDENTIFIER', 'q14': 'FLOAT', 'q15': 'IDENTIFIER', 'q16': 'IDENTIFIER', 'q17': 'REAL', 'q18': 'IDENTIFIER', 'q19': 'IDENTIFIER', 'q20': 'IDENTIFIER', 'q21': 'BREAK', 'q22': 'IDENTIFIER', 'q23': 'IDENTIFIER', 'q24': 'CHAR', 'q25': 'IDENTIFIER', 'q26': 'IDENTIFIER', 'q27': 'CASE', 'q28': 'IDENTIFIER', 'q29': 'IDENTIFIER', 'q30': 'IDENTIFIER', 'q31': 'IDENTIFIER', 'q32': 'DOUBLE', 'q33': 'IDENTIFIER', 'q34': 'IDENTIFIER', 'q35': 'CONST', 'q36': 'IDENTIFIER', 'q37': 'IDENTIFIER', 'q38': 'IDENTIFIER', 'q39': 'IDENTIFIER', 'q40': 'CONTINUE' } identifiers = ['IDENTIFIER', 'INTEGER NUMBER', 'REAL NUMBER'] automata.add_state('q1', {'+': 'q2', '-': 'q3'}) automata.add_state('q1', get_numerals('q3')) automata.add_state('q1', {'/': 'q6'}) automata.add_state('q1', get_alphabet('q8')) automata.add_state('q2', get_numerals('q3')) automata.add_state('q3', get_numerals('q3')) automata.add_state('q3', {'.': 'q4'}) automata.add_state('q4', get_numerals('q5')) automata.add_state('q5', get_numerals('q5')) automata.add_state('q6', {'/': 'q7'}) automata.add_state('q7', { '+': 'q7', '-': 'q7', '/': 'q7', '.': 'q7', ' ': 'q7' }) automata.add_state('q7', get_numerals('q7')) automata.add_state('q7', get_alphabet('q7')) automata.add_state( 'q8', { 'n': 'q9', 'l': 'q11', 'e': 'q15', 'r': 'q18', 'h': 'q22', 'a': 'q25', 'o': 'q28' }) automata.add_state( 'q8', { i: 'q8' for i in list( set(get_alphabet('q8')) - set(automata.get_transitions_values('q8'))) }) automata.add_state('q8', get_numerals('q8')) automata.add_state('q9', {'t': 'q10'}) automata.add_state( 'q9', { i: 'q8' for i in list( set(get_alphabet('q9')) - set(automata.get_transitions_values('q9'))) }) automata.add_state('q10', get_numerals('q8')) automata.add_state('q10', get_alphabet('q8')) automata.add_state('q11', {'o': 'q12'}) automata.add_state( 'q11', { i: 'q8' for i in list( set(get_alphabet('q11')) - set(automata.get_transitions_values('q11'))) }) automata.add_state('q12', {'a': 'q13'}) automata.add_state( 'q12', { i: 'q8' for i in list( set(get_alphabet('q12')) - set(automata.get_transitions_values('q12'))) }) automata.add_state('q13', {'t': 'q14'}) automata.add_state( 'q13', { i: 'q8' for i in list( set(get_alphabet('q13')) - set(automata.get_transitions_values('q13'))) }) automata.add_state('q14', get_numerals('q8')) automata.add_state('q14', get_alphabet('q8')) automata.add_state('q15', {'a': 'q16'}) automata.add_state( 'q15', { i: 'q8' for i in list( set(get_alphabet('q15')) - set(automata.get_transitions_values('q15'))) }) automata.add_state('q16', {'l': 'q17'}) automata.add_state( 'q16', { i: 'q8' for i in list( set(get_alphabet('q16')) - set(automata.get_transitions_values('q16'))) }) automata.add_state('q17', get_numerals('q8')) automata.add_state('q17', get_alphabet('q8')) automata.add_state('q18', {'e': 'q19'}) automata.add_state( 'q18', { i: 'q8' for i in list( set(get_alphabet('q18')) - set(automata.get_transitions_values('q18'))) }) automata.add_state('q19', {'a': 'q20'}) automata.add_state( 'q19', { i: 'q8' for i in list( set(get_alphabet('q19')) - set(automata.get_transitions_values('q19'))) }) automata.add_state('q20', {'k': 'q21'}) automata.add_state( 'q20', { i: 'q8' for i in list( set(get_alphabet('q20')) - set(automata.get_transitions_values('q20'))) }) automata.add_state('q21', get_numerals('q8')) automata.add_state('q21', get_alphabet('q8')) automata.add_state('q22', {'a': 'q23'}) automata.add_state( 'q22', { i: 'q8' for i in list( set(get_alphabet('q22')) - set(automata.get_transitions_values('q22'))) }) automata.add_state('q23', {'r': 'q24'}) automata.add_state( 'q23', { i: 'q8' for i in list( set(get_alphabet('q23')) - set(automata.get_transitions_values('q23'))) }) automata.add_state('q24', get_numerals('q8')) automata.add_state('q24', get_alphabet('q8')) automata.add_state('q25', {'s': 'q26'}) automata.add_state( 'q25', { i: 'q8' for i in list( set(get_alphabet('q25')) - set(automata.get_transitions_values('q25'))) }) automata.add_state('q26', {'e': 'q27'}) automata.add_state( 'q26', { i: 'q8' for i in list( set(get_alphabet('q26')) - set(automata.get_transitions_values('q26'))) }) automata.add_state('q27', get_numerals('q8')) automata.add_state('q27', get_alphabet('q8')) automata.add_state('q28', {'u': 'q29', 'n': 'q33'}) automata.add_state( 'q28', { i: 'q8' for i in list( set(get_alphabet('q28')) - set(automata.get_transitions_values('q28'))) }) automata.add_state('q29', {'b': 'q30'}) automata.add_state( 'q29', { i: 'q8' for i in list( set(get_alphabet('q29')) - set(automata.get_transitions_values('q29'))) }) automata.add_state('q30', {'l': 'q31'}) automata.add_state( 'q30', { i: 'q8' for i in list( set(get_alphabet('q30')) - set(automata.get_transitions_values('q30'))) }) automata.add_state('q31', {'e': 'q32'}) automata.add_state( 'q31', { i: 'q8' for i in list( set(get_alphabet('q31')) - set(automata.get_transitions_values('q31'))) }) automata.add_state('q32', get_numerals('q8')) automata.add_state('q32', get_alphabet('q8')) automata.add_state('q33', {'s': 'q34', 't': 'q36'}) automata.add_state( 'q33', { i: 'q8' for i in list( set(get_alphabet('q33')) - set(automata.get_transitions_values('q33'))) }) automata.add_state('q34', {'t': 'q35'}) automata.add_state( 'q34', { i: 'q8' for i in list( set(get_alphabet('q34')) - set(automata.get_transitions_values('q34'))) }) automata.add_state('q35', get_numerals('q8')) automata.add_state('q35', get_alphabet('q8')) automata.add_state('q36', {'i': 'q37'}) automata.add_state( 'q36', { i: 'q8' for i in list( set(get_alphabet('q36')) - set(automata.get_transitions_values('q36'))) }) automata.add_state('q37', {'n': 'q38'}) automata.add_state( 'q37', { i: 'q8' for i in list( set(get_alphabet('q37')) - set(automata.get_transitions_values('q37'))) }) automata.add_state('q38', {'u': 'q39'}) automata.add_state( 'q38', { i: 'q8' for i in list( set(get_alphabet('q38')) - set(automata.get_transitions_values('q38'))) }) automata.add_state('q39', {'e': 'q40'}) automata.add_state( 'q39', { i: 'q8' for i in list( set(get_alphabet('q39')) - set(automata.get_transitions_values('q39'))) }) automata.add_state('q40', get_numerals('q8')) automata.add_state('q40', get_alphabet('q8')) words = read_text_file(sys.argv[1]) get_validation(automata, words, primitives, identifiers)
class B2gCrawler(Crawler): def __init__(self, configuration, executor): self.automata = Automata() self.configuration = configuration self.executor = executor self.exe_stack = [] # stack of executed clickables (events) self.invariant_violation = [] self.num_clickables = { 'unexamined': 0, # num of candidate clickables found with rules in DomAnalyzer 'true': 0, # num of clickables triggered new state (different screen dom) 'false': 0, # num of clickables not triggering new state } def run(self): self.executor.restart_app() initial_state = State(self.executor.get_source()) self.automata.add_state(initial_state) self.save_screenshot(initial_state.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(initial_state) self.crawl(1) self.invariant_violation = sorted(self.invariant_violation, key=lambda k: int(k['state'])) return self.automata, self.invariant_violation, self.num_clickables def crawl(self, depth, prev_state=None): if depth <= self.configuration.get_max_depth(): cs = self.automata.get_current_state() if not self.violate_invariant(cs.get_dom(), cs.get_id()): candidate_clickables = DomAnalyzer.get_clickables(cs.get_dom(), prev_state.get_dom() if prev_state else None) self.num_clickables['unexamined'] += len(candidate_clickables) for clickable in candidate_clickables: # prefetch image of the clickable time.sleep(0.2) # time for correctly fetching image img_name = cs.get_id() + '-' + clickable.get_id() + '.png' img_data = self.executor.get_screenshot(clickable) # fire the clickable logger.debug('Fire event in state %s', cs.get_id()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) ft = FireEventThread(self.executor, clickable) ft.start() ft.join(self.configuration.get_sleep_time()*2) # time out after sleep_time*2 seconds if ft.is_alive(): # timed out logger.error('No response while firing an event. Execution sequences:') self.exe_stack.append(clickable) # add the clickable triggering No Response for c in self.exe_stack: logger.error(c) logger.error('Total clickables found: %d (true: %d, false: %d, unexamined: %d)', self.num_clickables['unexamined'] + self.num_clickables['true'] + self.num_clickables['false'], self.num_clickables['true'], self.num_clickables['false'], self.num_clickables['unexamined'] ) logger.error('Program terminated.') sys.exit() time.sleep(self.configuration.get_sleep_time()) self.num_clickables['unexamined'] -= 1 new_dom = self.executor.get_source() if DomAnalyzer.is_equal(cs.get_dom(), new_dom): self.num_clickables['false'] += 1 else: self.num_clickables['true'] += 1 cs.add_clickable(clickable) self.exe_stack.append(clickable) self.save_screenshot(img_name, img_data, 'clickable') ns, is_newly_added = self.automata.add_state(State(new_dom)) self.automata.add_edge(cs, ns, clickable) if is_newly_added: self.save_screenshot(ns.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(ns) self.automata.change_state(ns) self.crawl(depth+1, cs) self.exe_stack.pop(-1) self.automata.change_state(cs) self.backtrack(cs) def backtrack(self, state): logger.debug('Backtrack to state %s', state.get_id()) edges = self.automata.get_shortest_path(state) self.executor.restart_app() for (state_from, state_to, clickable, cost) in edges: time.sleep(self.configuration.get_sleep_time()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) self.executor.fire_event(clickable) def save_screenshot(self, fname, b64data, my_type): path = os.path.join(self.configuration.get_abs_path(my_type), fname) imgdata = base64.b64decode(b64data) with open(path, 'wb') as f: f.write(imgdata) def save_dom(self, state): with open(os.path.join(self.configuration.get_abs_path('dom'), state.get_id() + '.txt'), 'w') as f: f.write(state.get_dom()) def violate_invariant(self, dom, statd_id): is_violated = False for inv in self.configuration.get_invariants(): if inv.check(dom): is_violated = True violation = { 'state': statd_id, 'name': str(inv), 'sequence': list(self.exe_stack) # shallow copy of clickables } self.invariant_violation.append(violation) return is_violated
class B2gCrawler(Crawler): def __init__(self, configuration, executor): self.automata = Automata() self.configuration = configuration self.executor = executor self.exe_stack = [] # stack of executed clickables (events) self.invariant_violation = [] self.num_clickables = { 'unexamined': 0, # num of candidate clickables found with rules in DomAnalyzer 'true': 0, # num of clickables triggered new state (different screen dom) 'false': 0, # num of clickables not triggering new state } def run(self): self.executor.restart_app() initial_state = State(self.executor.get_source()) self.automata.add_state(initial_state) self.save_screenshot(initial_state.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(initial_state) self.crawl(1) self.invariant_violation = sorted(self.invariant_violation, key=lambda k: int(k['state'])) return self.automata, self.invariant_violation, self.num_clickables def crawl(self, depth, prev_state=None): if depth <= self.configuration.get_max_depth(): cs = self.automata.get_current_state() if not self.violate_invariant(cs.get_dom(), cs.get_id()): candidate_clickables = DomAnalyzer.get_clickables( cs.get_dom(), prev_state.get_dom() if prev_state else None) self.num_clickables['unexamined'] += len(candidate_clickables) for clickable in candidate_clickables: # prefetch image of the clickable time.sleep(0.2) # time for correctly fetching image img_name = cs.get_id() + '-' + clickable.get_id() + '.png' img_data = self.executor.get_screenshot(clickable) # fire the clickable logger.debug('Fire event in state %s', cs.get_id()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) ft = FireEventThread(self.executor, clickable) ft.start() ft.join(self.configuration.get_sleep_time() * 2) # time out after sleep_time*2 seconds if ft.is_alive(): # timed out logger.error( 'No response while firing an event. Execution sequences:' ) self.exe_stack.append( clickable ) # add the clickable triggering No Response for c in self.exe_stack: logger.error(c) logger.error( 'Total clickables found: %d (true: %d, false: %d, unexamined: %d)', self.num_clickables['unexamined'] + self.num_clickables['true'] + self.num_clickables['false'], self.num_clickables['true'], self.num_clickables['false'], self.num_clickables['unexamined']) logger.error('Program terminated.') sys.exit() time.sleep(self.configuration.get_sleep_time()) self.num_clickables['unexamined'] -= 1 new_dom = self.executor.get_source() if DomAnalyzer.is_equal(cs.get_dom(), new_dom): self.num_clickables['false'] += 1 else: self.num_clickables['true'] += 1 cs.add_clickable(clickable) self.exe_stack.append(clickable) self.save_screenshot(img_name, img_data, 'clickable') ns, is_newly_added = self.automata.add_state( State(new_dom)) self.automata.add_edge(cs, ns, clickable) if is_newly_added: self.save_screenshot( ns.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(ns) self.automata.change_state(ns) self.crawl(depth + 1, cs) self.exe_stack.pop(-1) self.automata.change_state(cs) self.backtrack(cs) def backtrack(self, state): logger.debug('Backtrack to state %s', state.get_id()) edges = self.automata.get_shortest_path(state) self.executor.restart_app() for (state_from, state_to, clickable, cost) in edges: time.sleep(self.configuration.get_sleep_time()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) self.executor.fire_event(clickable) def save_screenshot(self, fname, b64data, my_type): path = os.path.join(self.configuration.get_abs_path(my_type), fname) imgdata = base64.b64decode(b64data) with open(path, 'wb') as f: f.write(imgdata) def save_dom(self, state): with open( os.path.join(self.configuration.get_abs_path('dom'), state.get_id() + '.txt'), 'w') as f: f.write(state.get_dom()) def violate_invariant(self, dom, statd_id): is_violated = False for inv in self.configuration.get_invariants(): if inv.check(dom): is_violated = True violation = { 'state': statd_id, 'name': str(inv), 'sequence': list(self.exe_stack) # shallow copy of clickables } self.invariant_violation.append(violation) return is_violated
class AutomataTestCase(unittest.TestCase): def setUp(self): dom1 = ''' <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title></title> </head> <body>dom1</body> </html> ''' state = State(dom1) self.automata = Automata() self.automata.add_state(state) self.assertEqual(len(self.automata.get_states()), 1) self.assertEqual(self.automata.get_initial_state().get_id(), self.automata.get_current_state().get_id()) self.assertEqual(self.automata.get_current_state().get_id(), '0') # test adding and removing inputs and forms form1 = FormField('form1') form1.add_input(InputField('username', '//*[@id="username"]', 'castman')) form1.add_input(InputField('password', '', 'p@ssw0rd')) form1.add_input(InputField('password', '', 'p@ssw0rd')) self.assertEqual(len(form1.get_inputs()), 2) form1.remove_input(InputField('username', '//*[@id="username"]', 'castman')) self.assertEqual(len(form1.get_inputs()), 1) form2 = FormField('', '//*[@id="lst-ib"]') clickable = Clickable('', '//*[@id="btn1"]') clickable.add_form(form1) clickable.add_form(FormField('form1')) clickable.add_form(form2) self.assertEqual(len(clickable.get_forms()), 2) clickable.remove_form(FormField('', '//*[@id="lst-ib"]')) self.assertEqual(len(clickable.get_forms()), 1) # add the clickable into state 0 self.automata.get_current_state().add_clickable(clickable) def test_automata(self): dom1 = self.automata.get_current_state().get_dom() dom1 += '<custom></custom>' dom2 = dom1 state1 = State(dom1) state2 = State(dom2) self.automata.add_state(state1) state3, is_newly_added = self.automata.add_state(state2) self.assertTrue(state3 == state1) self.assertFalse(is_newly_added) self.assertEqual(len(self.automata.get_states()), 2) clickable = self.automata.get_current_state().get_clickables()[0] clickable2 = Clickable('', '//html/body/button[3]') self.assertEqual(len(self.automata.get_current_state().get_clickables()), 1) self.automata.get_current_state().add_clickable(clickable) self.automata.get_current_state().add_clickable(clickable2) self.automata.get_current_state().add_clickable(clickable2) self.assertEqual(len(self.automata.get_current_state().get_clickables()), 2) self.automata.add_edge(self.automata.get_current_state(), state1, self.automata.get_current_state().get_clickables()[0]) self.assertEqual(len(self.automata.get_edges()), 1) state1.add_prev_state(self.automata.get_current_state()) self.assertEqual(self.automata.get_current_state().get_id(), '0') self.automata.change_state(state1) self.assertEqual(self.automata.get_initial_state().get_id(), '0') self.assertEqual(self.automata.get_current_state().get_id(), '1') self.assertEqual(self.automata.get_current_state().get_prev_states()[0].get_id(), '0') ''' for s in self.automata.get_states(): print s for c in s.get_clickables(): print c for f in c.get_forms(): print f for _i in f.get_inputs(): print _i for (state_from, state_to, clickable, cost) in self.automata.get_edges(): print state_from, state_to, clickable, cost ''' def test_get_shortest_path(self): automata = Automata() state0 = State('state0') state1 = State('state1') state2 = State('state2') state3 = State('state3') state4 = State('state4') state5 = State('state5') state6 = State('state6') automata.add_state(state0) automata.add_state(state1) automata.add_state(state2) automata.add_state(state3) automata.add_state(state4) automata.add_state(state5) automata.add_state(state6) automata.add_edge(state0, state1, Clickable('0-1')) automata.add_edge(state0, state2, Clickable('0-2')) automata.add_edge(state0, state3, Clickable('0-3')) automata.add_edge(state2, state4, Clickable('2-4')) automata.add_edge(state4, state5, Clickable('4-5')) automata.add_edge(state3, state5, Clickable('3-5')) automata.add_edge(state3, state5, Clickable('5-0')) automata.add_edge(state5, state6, Clickable('5-6')) self.assertEqual(automata.get_shortest_path(state0), []) edges = automata.get_shortest_path(state6) # 0-3, 3-5, 5-6 self.assertEqual([int(e[0].get_id()) for e in edges], [0, 3, 5]) #for e in edges: # print e[0].get_id(), e[1].get_id(), e[2].get_id() def test_load_save(self): automata = Automata(fname='test_data/automata-example.json') config = B2gConfiguration('test-app-name', 'test-app-id', mkdir=False) config.set_path('root', 'test_data') config.set_path('dom', 'test_data/dom') config.set_path('state', 'test_data/screenshot/state') config.set_path('clickable', 'test_data/screenshot/clickable') saved_file_path = automata.save(config) import filecmp self.assertTrue(filecmp.cmp('test_data/automata-example.json', 'test_data/automata.json')) try: os.remove(saved_file_path) except OSError: pass