def debugTestMain(folderpath, dirname): logging.info(" setting config...") config = SeleniumConfiguration(Browser.FireFox, "http://140.112.42.145:2000/demo/nothing/main.html") config.set_max_depth(1) config.set_max_length(5) config.set_trace_amount(1) config.set_max_states(100) config.set_folderpath(folderpath) config.set_dirname(dirname) config.set_automata_fname('automata.json') config.set_traces_fname('traces.json') config.set_frame_tags(['iframe']) config.set_dom_inside_iframe(True) config.set_simple_clickable_tags() config.set_simple_inputs_tags() config.set_simple_normalizers() logging.info(" setting executor...") executor = SeleniumExecutor(config.get_browserID(), config.get_url()) logging.info(" setting crawler...") automata = Automata(config) databank = InlineDataBank("140.112.42.145:2000", "jeff", "zj4bj3jo37788", "test") algorithm = MonkeyCrawler() #DFScrawler() crawler = SeleniumCrawler(config, executor, automata, databank, algorithm) logging.info(" crawler start run...") crawler.run_algorithm() logging.info(" end! save automata...") algorithm.save_traces() automata.save_automata(config.get_automata_fname()) Visualizer.generate_html('web', os.path.join(config.get_path('root'), config.get_automata_fname())) config.save_config('config.json')
def CBTestMain(folderpath, dirname,web_submit_id): logging.info(" Type: Cross Browser Testing") logging.info(" connect to mysql") print("") print("connect to sql") databank = MysqlDataBank("localhost", "root", "", "test") url, deep, time, b1, b2 = databank.get_websubmit(int(web_submit_id)) basic_browserID = str(b1) other_browserID = str(b2) depth = int(deep) logging.info(" A new CBT begings...") logging.info(" setting config...") config = SeleniumConfiguration(int(basic_browserID),int(other_browserID), url) # max 3 config.set_max_depth(int(depth)) # max 3 config.set_max_length(int(depth)) # should be 1 config.set_trace_amount(1) # should be 100 no use? config.set_max_states(5) config.set_folderpath(folderpath) config.set_dirname(dirname) config.set_automata_fname('automata.json') config.set_traces_fname('traces.json') #config.set_frame_tags(['iframe']) config.set_dom_inside_iframe(True) config.set_simple_clickable_tags() config.set_simple_inputs_tags() config.set_simple_normalizers() logging.info(" setting executor...") #nothing here executor = CBTExecutor(config.get_browserID(), config.get_url()) logging.info(" setting crawler...") automata = Automata(config) #databank = InlineDataBank("140.112.42.145:2000", "jeff", "zj4bj3jo37788", "test") databank = InlineDataBank("localhost", "B00901138", "R124249166", "test") print('start Cross Browser Testing...') #acually it's CBT algorithm algorithm = CBTCrawler(int(other_browserID),url) crawler = SeleniumCrawler(config, executor, automata, databank, algorithm) logging.info(" crawler start runing...") crawler.run_algorithm() print(" end! save automata...") logging.info(" end! save automata...") algorithm.save_traces() automata.save_automata(config.get_automata_fname()) Visualizer.generate_html('web', os.path.join(config.get_path('root'), config.get_automata_fname())) config.save_config('config.json')
def get_clickables(cls, dom, prev_dom=None): # only return newly discovered clickables and forms, i.e. clickables not in prev_clickables prev_clickables = [] prev_forms = [] if prev_dom: prev_soup = BeautifulSoup(prev_dom, 'html.parser') for tag in cls._clickable_tags: if tag.get_attr(): for attr, value in tag.get_attr().items(): prev_clickables += prev_soup.find_all(tag.get_name(), attrs={attr: value}) else: prev_clickables += prev_soup.find_all(tag.get_name()) prev_forms = prev_soup.find_all('form') soup = BeautifulSoup(dom, 'html.parser') forms = soup.find_all('form') clickables = [] # clickables with forms and inputs attached for form in forms: if form in prev_forms: continue form_id = form.get('id') if not form_id: form_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 f = FormField(form_id, cls._get_xpath(form)) for input_type in cls.input_types: inputs = form.find_all('input', attrs={'type': input_type}) for my_input in inputs: data_set = InlineDataBank.get_data(input_type) if data_set: value = random.choice(list(data_set)) else: value = ''.join(random.choice(string.lowercase) for i in xrange(8)) input_id = my_input.get('id') if not input_id: input_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 f.add_input(InputField(input_id, cls._get_xpath(my_input), input_type, value)) for tag in cls._clickable_tags: if tag.get_attr(): for attr, value in tag.get_attr().items(): candidate_clickables = form.find_all(tag.get_name(), attrs={attr: value}) else: candidate_clickables = form.find_all(tag.get_name()) for candidate_clickable in candidate_clickables: if candidate_clickable in prev_clickables: continue clickable_id = candidate_clickable.get('id') if not clickable_id: clickable_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 c = Clickable(clickable_id, cls._get_xpath(candidate_clickable), tag.get_name()) c.add_form(f) clickables.append(c) # other clickables for tag in cls._clickable_tags: if tag.get_attr(): for attr, value in tag.get_attr().items(): candidate_clickables = soup.find_all(tag.get_name(), attrs={attr: value}) else: candidate_clickables = soup.find_all(tag.get_name()) for candidate_clickable in candidate_clickables: #print candidate_clickable if candidate_clickable in prev_clickables: continue if not cls._is_duplicate(clickables, candidate_clickable): clickable_id = candidate_clickable.get('id') if not clickable_id: clickable_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 clickables.append(Clickable(clickable_id, cls._get_xpath(candidate_clickable), tag.get_name())) return clickables
def get_clickables(cls, dom, prev_dom=None): # only return newly discovered clickables and forms, i.e. clickables not in prev_clickables prev_clickables = [] prev_forms = [] if prev_dom: prev_soup = BeautifulSoup(prev_dom, 'html.parser') for tag in cls._clickable_tags: if tag.get_attr(): for attr, value in tag.get_attr().items(): prev_clickables += prev_soup.find_all( tag.get_name(), attrs={attr: value}) else: prev_clickables += prev_soup.find_all(tag.get_name()) prev_forms = prev_soup.find_all('form') soup = BeautifulSoup(dom, 'html.parser') forms = soup.find_all('form') clickables = [] # clickables with forms and inputs attached for form in forms: if form in prev_forms: continue form_id = form.get('id') if not form_id: form_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 f = FormField(form_id, cls._get_xpath(form)) for input_type in cls.input_types: inputs = form.find_all('input', attrs={'type': input_type}) for my_input in inputs: data_set = InlineDataBank.get_data(input_type) if data_set: value = random.choice(list(data_set)) else: value = ''.join( random.choice(string.lowercase) for i in xrange(8)) input_id = my_input.get('id') if not input_id: input_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 f.add_input( InputField(input_id, cls._get_xpath(my_input), input_type, value)) for tag in cls._clickable_tags: if tag.get_attr(): for attr, value in tag.get_attr().items(): candidate_clickables = form.find_all( tag.get_name(), attrs={attr: value}) else: candidate_clickables = form.find_all(tag.get_name()) for candidate_clickable in candidate_clickables: if candidate_clickable in prev_clickables: continue clickable_id = candidate_clickable.get('id') if not clickable_id: clickable_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 c = Clickable(clickable_id, cls._get_xpath(candidate_clickable), tag.get_name()) c.add_form(f) clickables.append(c) # other clickables for tag in cls._clickable_tags: if tag.get_attr(): for attr, value in tag.get_attr().items(): candidate_clickables = soup.find_all(tag.get_name(), attrs={attr: value}) else: candidate_clickables = soup.find_all(tag.get_name()) for candidate_clickable in candidate_clickables: #print candidate_clickable if candidate_clickable in prev_clickables: continue if not cls._is_duplicate(clickables, candidate_clickable): clickable_id = candidate_clickable.get('id') if not clickable_id: clickable_id = cls.serial_prefix + str(cls._serial_num) cls._serial_num += 1 clickables.append( Clickable(clickable_id, cls._get_xpath(candidate_clickable), tag.get_name())) return clickables
def test_inline_databank(self): from data_bank import InlineDataBank InlineDataBank.add_item('email', '*****@*****.**') InlineDataBank.add_item('email', '*****@*****.**') # duplicated adding InlineDataBank.remove_item('email', '*****@*****.**') # invalid removal InlineDataBank.remove_item('email', '*****@*****.**') # valid removal self.assertEqual(len(InlineDataBank.get_data('email')), 2) InlineDataBank.add_item('phone-number', '0912345678') # add data into a new type InlineDataBank.add_item('phone-number', '0987654321') self.assertEqual(len(InlineDataBank.get_types()), 4) self.assertEqual(len(InlineDataBank.get_data('phone-number')), 2) self.assertIsNone(InlineDataBank.get_data('text'))