def test_automata(self): dom1 = self.automata.get_current_state().get_dom() dom1 += '<custom></custom>' dom2 = dom1 state1 = State(dom1) state2 = State(dom2) self.automata.add_state(state1) state3, is_newly_added = self.automata.add_state(state2) self.assertTrue(state3 == state1) self.assertFalse(is_newly_added) self.assertEqual(len(self.automata.get_states()), 2) clickable = self.automata.get_current_state().get_clickables()[0] clickable2 = Clickable('', '//html/body/button[3]') self.assertEqual(len(self.automata.get_current_state().get_clickables()), 1) self.automata.get_current_state().add_clickable(clickable) self.automata.get_current_state().add_clickable(clickable2) self.automata.get_current_state().add_clickable(clickable2) self.assertEqual(len(self.automata.get_current_state().get_clickables()), 2) self.automata.add_edge(self.automata.get_current_state(), state1, self.automata.get_current_state().get_clickables()[0]) self.assertEqual(len(self.automata.get_edges()), 1) state1.add_prev_state(self.automata.get_current_state()) self.assertEqual(self.automata.get_current_state().get_id(), '0') self.automata.change_state(state1) self.assertEqual(self.automata.get_initial_state().get_id(), '0') self.assertEqual(self.automata.get_current_state().get_id(), '1') self.assertEqual(self.automata.get_current_state().get_prev_states()[0].get_id(), '0') '''
def product(self, other, mode='intersection'): new_states = [(state1, state2) for state1, state2 in it.product(self.Q, other.Q)] state_name = lambda state1, state2: "(%s, %s)" % (str(state1), str(state2)) delta_dict = { state_name(state1, state2): { 'transitions': {} } for state1, state2 in new_states } for state1, state2 in new_states: if state1 == self.q_0 and state2 == other.q_0: delta_dict[str(State(state_name( state1, state2)))]['starting_state'] = True if mode == 'intersection': delta_dict[str( State(state_name(state1, state2)) )]['final_state'] = state1 in self.F and state2 in other.F else: delta_dict[str( State(state_name(state1, state2)) )]['final_state'] = state1 in self.F or state2 in other.F for entry in self.Sigma: delta_dict[str(State(state_name( state1, state2)))]['transitions'][entry] = str( State( state_name(self.delta(state1, entry), other.delta(state2, entry)))) # save_json('data/dfa3.json', delta_dict) return load_dfa_from_dict(delta_dict)
def run(self): self.executor.restart_app() initial_state = State(self.executor.get_source()) self.automata.add_state(initial_state) self.save_screenshot(initial_state.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(initial_state) self.crawl(1) self.invariant_violation = sorted(self.invariant_violation, key=lambda k: int(k['state'])) return self.automata, self.invariant_violation, self.num_clickables
def setUp(self): dom1 = ''' <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title></title> </head> <body>dom1</body> </html> ''' state = State(dom1) self.automata = Automata() self.automata.add_state(state) self.assertEqual(len(self.automata.get_states()), 1) self.assertEqual(self.automata.get_initial_state().get_id(), self.automata.get_current_state().get_id()) self.assertEqual(self.automata.get_current_state().get_id(), '0') # test adding and removing inputs and forms form1 = FormField('form1') form1.add_input(InputField('username', '//*[@id="username"]', 'castman')) form1.add_input(InputField('password', '', 'p@ssw0rd')) form1.add_input(InputField('password', '', 'p@ssw0rd')) self.assertEqual(len(form1.get_inputs()), 2) form1.remove_input(InputField('username', '//*[@id="username"]', 'castman')) self.assertEqual(len(form1.get_inputs()), 1) form2 = FormField('', '//*[@id="lst-ib"]') clickable = Clickable('', '//*[@id="btn1"]') clickable.add_form(form1) clickable.add_form(FormField('form1')) clickable.add_form(form2) self.assertEqual(len(clickable.get_forms()), 2) clickable.remove_form(FormField('', '//*[@id="lst-ib"]')) self.assertEqual(len(clickable.get_forms()), 1) # add the clickable into state 0 self.automata.get_current_state().add_clickable(clickable)
def test_get_shortest_path(self): automata = Automata() state0 = State('state0') state1 = State('state1') state2 = State('state2') state3 = State('state3') state4 = State('state4') state5 = State('state5') state6 = State('state6') automata.add_state(state0) automata.add_state(state1) automata.add_state(state2) automata.add_state(state3) automata.add_state(state4) automata.add_state(state5) automata.add_state(state6) automata.add_edge(state0, state1, Clickable('0-1')) automata.add_edge(state0, state2, Clickable('0-2')) automata.add_edge(state0, state3, Clickable('0-3')) automata.add_edge(state2, state4, Clickable('2-4')) automata.add_edge(state4, state5, Clickable('4-5')) automata.add_edge(state3, state5, Clickable('3-5')) automata.add_edge(state3, state5, Clickable('5-0')) automata.add_edge(state5, state6, Clickable('5-6')) self.assertEqual(automata.get_shortest_path(state0), []) edges = automata.get_shortest_path(state6) # 0-3, 3-5, 5-6 self.assertEqual([int(e[0].get_id()) for e in edges], [0, 3, 5])
def get_initail_state(self): logging.info(' get initial state') dom_list, url = self.executor.get_dom_list(self.configuration) initial_state = State(dom_list, url) is_new, state = self.automata.set_initial_state(initial_state) if is_new: self.automata.save_state(self.executor, initial_state, 0) self.automata.save_state_shot(self.executor, initial_state) else: self.automata.change_state(state) time.sleep(self.configuration.get_sleep_time()) return state
def load_dfa_from_dict(dfa_obj) -> DFA: Q: Set[State] = set(map(State, dfa_obj.keys())) Sigma: Set[str] = set( it.chain.from_iterable( [state['transitions'].keys() for _, state in dfa_obj.items()])) F: Set[State] = set( map(State, [ state for state, values in dfa_obj.items() if values['final_state'] ])) q_0: Optional[State] = set( map(State, [ state for state, values in dfa_obj.items() if 'starting_state' in values.keys() ])).pop() delta: Callable[ [State, str], Optional[State]] = lambda state, entry: State( dfa_obj.get(str(state), {'transitions': {}})['transitions'].get( entry, None)) if state is not None else None return DFA(Q, Sigma, delta, q_0, F)
def run_mutant_script(self, prev_state, mutation_trace=None): depth = 0 edge_trace = [] state_trace = [prev_state] # use -1 to mark cluster_value = prev_state.get_id( ) if mutation_trace else "-1" + prev_state.get_id() for edge in self.configuration.get_mutation_trace(): new_edge = edge.get_copy() new_edge.set_state_from(prev_state.get_id()) if mutation_trace: self.make_mutant_value(new_edge, mutation_trace[depth]) self.executor.click_event_by_edge(new_edge) self.event_history.append(new_edge) dom_list, url, is_same = self.is_same_state_dom(prev_state) if not is_same: logging.info(' change dom to: %s', url) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(new_edge, new_state.get_id()) # save this click edge prev_state.add_clickable(edge.get_clickable(), new_edge.get_iframe_list()) if is_newly_added: logging.info(' add new state %s of: %s', new_state.get_id(), url) self.automata.save_state(new_state, depth + 1) self.automata.save_state_shot(self.executor, new_state) self.automata.change_state(new_state) # save the state, edge state_trace.append(new_state) edge_trace.append(new_edge) cluster_value += new_state.get_id() # prepare for next edge prev_state = new_state depth += 1 self.mutation_history.append((edge_trace, state_trace, cluster_value)) logging.warning([c for e, s, c in self.mutation_history])
def run_script_before_crawl(self, prev_state): for edge in self.configuration.get_before_script(): self.executor.click_event_by_edge(edge) self.event_history.append(edge) dom_list, url, is_same = self.is_same_state_dom(prev_state) if is_same: continue logging.info(' change dom to: ', self.executor.get_url()) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(edge, new_state.get_id()) # save this click edge prev_state.add_clickable(edge.get_clickable(), edge.get_iframe_list()) if is_newly_added: logging.info(' add new state %s of: %s', new_state.get_id(), url) self.automata.save_state(new_state, 0) self.automata.save_state_shot(self.executor, new_state) self.automata.change_state(new_state) prev_state = new_state
def update_states(self, current_state, new_edge, action, depth): dom_list, url, is_same = self.is_same_state_dom(current_state) if is_same: self.algorithm.update_with_same_state(current_state, new_edge, action, depth, dom_list, url) if self.is_same_domain(url): logging.info(' |depth:%s state:%s| change dom to: %s', depth, current_state.get_id(), self.executor.get_url()) # check if this is a new state temp_state = State(dom_list, url) new_state, is_newly_added = self.automata.add_state(temp_state) self.automata.add_edge(new_edge, new_state.get_id()) # save this click edge current_state.add_clickable(action['clickable'], action['iframe_key']) self.automata.change_state(new_state) # depth GO ON depth += 1 self.event_history.append(new_edge) if is_newly_added: self.algorithm.update_with_new_state(current_state, new_state, new_edge, action, depth, dom_list, url) else: self.algorithm.update_with_old_state(current_state, new_state, new_edge, action, depth, dom_list, url) else: self.algorithm.update_with_out_of_domain(current_state, new_edge, action, depth, dom_list, url)
def load_automata(fname): t_start = time.time() assert os.path.isfile(fname) and os.path.exists(fname) automata = Automata() with open(fname) as f: data = json.load(f) for state in data['state']: with open(os.path.join(os.path.dirname(os.path.realpath(fname)), state['dom_path']), 'r') as df: s = State(df.read()) s.set_id(state['id']) for clickable in state['clickable']: c = Clickable(clickable['id'], clickable['xpath'], clickable['tag']) s.add_clickable(c) automata.add_state(s) for edge in data['edge']: from_state = automata.get_state_by_id(edge['from']) to_state = automata.get_state_by_id(edge['to']) clickable = from_state.get_clickable_by_id(edge['clickable']) assert from_state and to_state and clickable automata.add_edge(from_state, to_state, clickable) return automata
def executor_backtrack( self, state, *executors ): # check if depth over max depth , time over max time if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end backtrack ") return #if url are same, guess they are just javascipt edges if executors[0].get_url() == state.get_url(): #first, just refresh for javascript button logging.info('==<BACKTRACK> : try refresh') for exe in executors: exe.refresh() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try go back form history logging.info('==<BACKTRACK> : try back_history ') for exe in executors: exe.back_history() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True logging.info('==<BACKTRACK> : try back_script ') for exe in executors: exe.back_script() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try do last edge of state history if self.event_history: logging.info('==<BACKTRACK> : try last edge of state history') for exe in executors: exe.forward_history() exe.click_event_by_edge( self.event_history[-1] ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, try go through all edge logging.info('==<BACKTRACK> : start form base ur') for exe in executors: exe.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in self.automata.get_shortest_path(state): for exe in executors: exe.click_event_by_edge( edge ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, restart and try go again logging.info('==<BACKTRACK> : retart driver') for exe in executors: exe.restart_app() exe.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in self.automata.get_shortest_path(state): for exe in executors: exe.click_event_by_edge( edge ) #check again if executor really turn back. if not, sth error, stop state_to = self.automata.get_state_by_id( edge.get_state_to() ) dom_list, url, is_same = self.is_same_state_dom(state_to) if not is_same: try: debug_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id(), 'debug' ) if not os.path.isdir(debug_dir): os.makedirs(debug_dir) err = State(dom_list, url) with codecs.open( os.path.join( debug_dir, 'debug_origin_'+state_to.get_id()+'.txt' ), 'w', encoding='utf-8' ) as f: f.write(state_to.get_all_dom(self.configuration)) with codecs.open( os.path.join( debug_dir, 'debug_restart_'+state_to.get_id()+'.txt' ), 'w', encoding='utf-8' ) as f: f.write(err.get_all_dom(self.configuration)) with codecs.open( os.path.join( debug_dir, 'debug_origin_nor_'+state_to.get_id()+'.txt' ), 'w', encoding='utf-8' ) as f: f.write( state_to.get_all_normalize_dom(self.configuration) ) with codecs.open( os.path.join( debug_dir, 'debug_restart_nor_'+state_to.get_id()+'.txt' ), 'w', encoding='utf-8' ) as f: f.write( err.get_all_normalize_dom(self.configuration) ) logging.error('==<BACKTRACK> cannot traceback to %s \t\t__from crawler.py backtrack()', state_to.get_id() ) except Exception as e: logging.info('==<BACKTRACK> save diff dom : %s', str(e)) dom_list, url, is_same = self.is_same_state_dom(state) return is_same
def executor_backtrack(self, state, *executors): # check if depth over max depth , time over max time if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end backtrack ") return #if url are same, guess they are just javascipt edges if executors[0].get_url() == state.get_url(): #first, just refresh for javascript button logging.info('==<BACKTRACK> : try refresh') for exe in executors: exe.refresh() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try go back form history logging.info('==<BACKTRACK> : try back_history ') for exe in executors: exe.back_history() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True logging.info('==<BACKTRACK> : try back_script ') for exe in executors: exe.back_script() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try do last edge of state history if self.event_history: logging.info('==<BACKTRACK> : try last edge of state history') for exe in executors: exe.forward_history() exe.click_event_by_edge(self.event_history[-1]) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, try go through all edge logging.info('==<BACKTRACK> : start form base ur') for exe in executors: exe.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in self.automata.get_shortest_path(state): for exe in executors: exe.click_event_by_edge(edge) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, restart and try go again logging.info('==<BACKTRACK> : retart driver') for exe in executors: exe.restart_app() exe.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in self.automata.get_shortest_path(state): for exe in executors: exe.click_event_by_edge(edge) #check again if executor really turn back. if not, sth error, stop state_to = self.automata.get_state_by_id(edge.get_state_to()) dom_list, url, is_same = self.is_same_state_dom(state_to) if not is_same: try: debug_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id(), 'debug') if not os.path.isdir(debug_dir): os.makedirs(debug_dir) err = State(dom_list, url) with codecs.open(os.path.join( debug_dir, 'debug_origin_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write(state_to.get_all_dom(self.configuration)) with codecs.open(os.path.join( debug_dir, 'debug_restart_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write(err.get_all_dom(self.configuration)) with codecs.open(os.path.join( debug_dir, 'debug_origin_nor_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write( state_to.get_all_normalize_dom(self.configuration)) with codecs.open(os.path.join( debug_dir, 'debug_restart_nor_' + state_to.get_id() + '.txt'), 'w', encoding='utf-8') as f: f.write(err.get_all_normalize_dom(self.configuration)) logging.error( '==<BACKTRACK> cannot traceback to %s \t\t__from crawler.py backtrack()', state_to.get_id()) except Exception as e: logging.info('==<BACKTRACK> save diff dom : %s', str(e)) dom_list, url, is_same = self.is_same_state_dom(state) return is_same
def both_executors_backtrack(self, state, other_executor=None): # check if depth over max depth , time over max time logging.info("both exe backtrack") print("both exe backtrack") if (time.time() - self.time_start) > self.configuration.get_max_time(): logging.info("|||| TIMO OUT |||| end backtrack ") return #if url are same, guess they are just javascipt edges if self.executor.get_url() == state.get_url() and other_executor.get_url() == state.get_url(): #first, just refresh for javascript button logging.info('==<BACKTRACK> : try refresh') self.executor.refresh() other_executor.refresh() #!!!!!!!!CBT dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't , try go back form history logging.info('==<BACKTRACK> : both exe try back_history ') before_url = self.executor.get_url() self.executor.back_history() print('exe back') dom_list, after_url, is_same = self.is_same_state_dom(state) if before_url == other_executor.get_url(): other_executor.back_history() print('other exe back') if is_same: return True #if can't , try do last edge of state history if self.event_history: logging.info('==<BACKTRACK> : try last edge of state history') self.executor.forward_history() other_executor.forward_history() self.executor.click_event_by_edge( self.event_history[-1] ) other_executor.click_event_by_edge( self.event_history[-1] ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, try go through all edge logging.info('==<BACKTRACK> : start form base ur') self.executor.goto_url() other_executor.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True edges = self.automata.get_shortest_path(state) for edge in edges: self.executor.click_event_by_edge( edge ) other_executor.click_event_by_edge( edge ) dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True #if can't, restart and try go again logging.info('==<BACKTRACK> : restart driver') edges = self.automata.get_shortest_path(state) self.executor.restart_app() self.executor.goto_url() other_executor.restart_app() other_executor.goto_url() dom_list, url, is_same = self.is_same_state_dom(state) if is_same: return True for edge in edges: self.executor.click_event_by_edge(edge) other_executor.click_event_by_edge(edge) #check again if executor really turn back. if not, sth error, stop state_to = self.automata.get_state_by_id( edge.get_state_to() ) dom_list, url, is_same = self.is_same_state_dom(state_to) if not is_same: try: err = State(dom_list, url) with open('debug/debug_origin_'+state_to.get_id()+'.txt', 'w') as f: f.write(state_to.get_all_dom(self.configuration)) with open('debug/debug_restart_'+state_to.get_id()+'.txt', 'w') as f: f.write(err.get_all_dom(self.configuration)) with open('debug/debug_origin_nor_'+state_to.get_id()+'.txt', 'w') as f: f.write( state_to.get_all_normalize_dom(self.configuration) ) with open('debug/debug_restart_nor_'+state_to.get_id()+'.txt', 'w') as f: f.write( err.get_all_normalize_dom(self.configuration) ) logging.error('==<BACKTRACK> cannot traceback to %s \t\t__from crawler.py backtrack()', state_to.get_id() ) except Exception as e: logging.info('==<BACKTRACK> save diff dom : %s', str(e)) dom_list, url, is_same = self.is_same_state_dom(state) return is_same
def crawl(self, depth, prev_state=None): if depth <= self.configuration.get_max_depth(): cs = self.automata.get_current_state() if not self.violate_invariant(cs.get_dom(), cs.get_id()): candidate_clickables = DomAnalyzer.get_clickables( cs.get_dom(), prev_state.get_dom() if prev_state else None) self.num_clickables['unexamined'] += len(candidate_clickables) for clickable in candidate_clickables: # prefetch image of the clickable time.sleep(0.2) # time for correctly fetching image img_name = cs.get_id() + '-' + clickable.get_id() + '.png' img_data = self.executor.get_screenshot(clickable) # fire the clickable logger.debug('Fire event in state %s', cs.get_id()) self.executor.empty_form(clickable) self.executor.fill_form(clickable) ft = FireEventThread(self.executor, clickable) ft.start() ft.join(self.configuration.get_sleep_time() * 2) # time out after sleep_time*2 seconds if ft.is_alive(): # timed out logger.error( 'No response while firing an event. Execution sequences:' ) self.exe_stack.append( clickable ) # add the clickable triggering No Response for c in self.exe_stack: logger.error(c) logger.error( 'Total clickables found: %d (true: %d, false: %d, unexamined: %d)', self.num_clickables['unexamined'] + self.num_clickables['true'] + self.num_clickables['false'], self.num_clickables['true'], self.num_clickables['false'], self.num_clickables['unexamined']) logger.error('Program terminated.') sys.exit() time.sleep(self.configuration.get_sleep_time()) self.num_clickables['unexamined'] -= 1 new_dom = self.executor.get_source() if DomAnalyzer.is_equal(cs.get_dom(), new_dom): self.num_clickables['false'] += 1 else: self.num_clickables['true'] += 1 cs.add_clickable(clickable) self.exe_stack.append(clickable) self.save_screenshot(img_name, img_data, 'clickable') ns, is_newly_added = self.automata.add_state( State(new_dom)) self.automata.add_edge(cs, ns, clickable) if is_newly_added: self.save_screenshot( ns.get_id() + '.png', self.executor.get_screenshot(), 'state') self.save_dom(ns) self.automata.change_state(ns) self.crawl(depth + 1, cs) self.exe_stack.pop(-1) self.automata.change_state(cs) self.backtrack(cs)
lives = 0 #NoExtinct = [] #Extinct = [] for i in range( 1, 2**(n * n)): # Loop through all possible states except the zero state. """ if(i in NoExtinct): lives += 1 continue if(i in Extinct): continue """ state = State(n) state.stateFromInt(i) states = [i] while (True): state.nextGen() stateNum = state.state2Int() if (stateNum == 0): #Extinct = Extinct + states #Extinct = list(set(Extinct)) break elif (stateNum in states): lives += 1 #NoExtinct = NoExtinct + states #NoExtinct = list(set(NoExtinct)) break