def set_clickable_tag(self, tag_name, attr=None, value=None):
     self._analyzer['clickable_tags'].append({
         'tag': tag_name,
         'attr': attr,
         'value': value
     })
     DomAnalyzer.add_clickable_tag(tag_name, attr, value)
    def save_state(self, state, depth):
        candidate_clickables = {}       
        inputs = {}
        selects = {}
        checkboxes = {}
        radios = {}
        for stateDom in state.get_dom_list(self.configuration):
            iframe_path_list = stateDom['iframe_path']
            dom = stateDom['dom']
            # define iframe_key of dom dict
            iframe_key = ';'.join(iframe_path_list) if iframe_path_list else None

            candidate_clickables[iframe_key] = DomAnalyzer.get_candidate_clickables_soup(dom)
            inputs[iframe_key] = DomAnalyzer.get_inputs(dom)
            selects[iframe_key] = DomAnalyzer.get_selects(dom)
            checkboxes[iframe_key] = DomAnalyzer.get_checkboxes(dom)
            radios[iframe_key] = DomAnalyzer.get_radios(dom)
            
        state.set_candidate_clickables(candidate_clickables)
        state.set_inputs(inputs)
        state.set_selects(selects)
        state.set_checkboxes(checkboxes)
        state.set_radios(radios)
        state.set_depth(depth)

        self.save_dom(state)
    def save_state(self, state, depth):
        candidate_clickables = {}
        inputs = {}
        selects = {}
        checkboxes = {}
        radios = {}
        for stateDom in state.get_dom_list(self.configuration):
            iframe_path_list = stateDom['iframe_path']
            dom = stateDom['dom']
            # define iframe_key of dom dict
            iframe_key = ';'.join(
                iframe_path_list) if iframe_path_list else None

            candidate_clickables[
                iframe_key] = DomAnalyzer.get_candidate_clickables_soup(dom)
            inputs[iframe_key] = DomAnalyzer.get_inputs(dom)
            selects[iframe_key] = DomAnalyzer.get_selects(dom)
            checkboxes[iframe_key] = DomAnalyzer.get_checkboxes(dom)
            radios[iframe_key] = DomAnalyzer.get_radios(dom)

        state.set_candidate_clickables(candidate_clickables)
        state.set_inputs(inputs)
        state.set_selects(selects)
        state.set_checkboxes(checkboxes)
        state.set_radios(radios)
        state.set_depth(depth)

        self.save_dom(state)
 def get_all_normalize_dom(self, configuration):
     if not self._dom_list:
         dom_list = self.get_dom_list(configuration)
         dom = [ DomAnalyzer.normalize( stateDom['dom'] ) for stateDom in dom_list ]
         dom = "\n".join(dom)
         return dom
     else:
         dom = [ DomAnalyzer.normalize( stateDom['dom'] ) for stateDom in self._dom_list ]
         dom = "\n".join(dom)
         return dom
Beispiel #5
0
    def crawl(self, depth, prev_state=None):
        if depth <= self.configuration.get_max_depth():
            cs = self.automata.get_current_state()
            if not self.violate_invariant(cs.get_dom(), cs.get_id()):
                candidate_clickables = DomAnalyzer.get_clickables(cs.get_dom(), prev_state.get_dom() if prev_state else None)
                self.num_clickables['unexamined'] += len(candidate_clickables)
                for clickable in candidate_clickables:
                    # prefetch image of the clickable
                    time.sleep(0.2)  # time for correctly fetching image
                    img_name = cs.get_id() + '-' + clickable.get_id() + '.png'
                    img_data = self.executor.get_screenshot(clickable)

                    # fire the clickable
                    logger.debug('Fire event in state %s', cs.get_id())
                    self.executor.empty_form(clickable)
                    self.executor.fill_form(clickable)
                    ft = FireEventThread(self.executor, clickable)
                    ft.start()
                    ft.join(self.configuration.get_sleep_time()*2)  # time out after sleep_time*2 seconds
                    if ft.is_alive():  # timed out
                        logger.error('No response while firing an event. Execution sequences:')
                        self.exe_stack.append(clickable)  # add the clickable triggering No Response
                        for c in self.exe_stack:
                            logger.error(c)
                        logger.error('Total clickables found: %d (true: %d, false: %d, unexamined: %d)',
                                     self.num_clickables['unexamined'] + self.num_clickables['true'] + self.num_clickables['false'],
                                     self.num_clickables['true'],
                                     self.num_clickables['false'],
                                     self.num_clickables['unexamined']
                                     )
                        logger.error('Program terminated.')
                        sys.exit()
                    time.sleep(self.configuration.get_sleep_time())
                    self.num_clickables['unexamined'] -= 1

                    new_dom = self.executor.get_source()
                    if DomAnalyzer.is_equal(cs.get_dom(), new_dom):
                        self.num_clickables['false'] += 1
                    else:
                        self.num_clickables['true'] += 1
                        cs.add_clickable(clickable)
                        self.exe_stack.append(clickable)
                        self.save_screenshot(img_name, img_data, 'clickable')
                        ns, is_newly_added = self.automata.add_state(State(new_dom))
                        self.automata.add_edge(cs, ns, clickable)
                        if is_newly_added:
                            self.save_screenshot(ns.get_id() + '.png', self.executor.get_screenshot(), 'state')
                            self.save_dom(ns)
                            self.automata.change_state(ns)
                            self.crawl(depth+1, cs)
                        self.exe_stack.pop(-1)
                        self.automata.change_state(cs)
                        self.backtrack(cs)
 def set_tag_with_attribute_normalizer(self,
                                       tag_name,
                                       attr=None,
                                       value=None,
                                       mode=None):
     self._analyzer['tag_with_attribute_normalizers'].append({
         'tag': tag_name,
         'attr': attr,
         'value': value,
         'mode': mode
     })
     DomAnalyzer.add_tag_with_attribute_normalizer(tag_name, attr, value,
                                                   mode)
Beispiel #7
0
 def get_all_normalize_dom(self, configuration):
     if not self._dom_list:
         dom_list = self.get_dom_list(configuration)
         dom = [
             DomAnalyzer.normalize(stateDom['dom']) for stateDom in dom_list
         ]
         dom = "\n".join(dom)
         return dom
     else:
         dom = [
             DomAnalyzer.normalize(stateDom['dom'])
             for stateDom in self._dom_list
         ]
         dom = "\n".join(dom)
         return dom
    def get_dom_list(self, configuration):
        #save dom of iframe in list of StateDom [iframe_path_list, dom, url/src, normalize dom]
        dom_list = []
        new_dom = self.switch_iframe_and_get_source()

        url = self.get_url()
        soup = BeautifulSoup(new_dom, 'html5lib')
        for frame in configuration.get_frame_tags():
            for iframe_tag in soup.find_all(frame):
                iframe_xpath = DomAnalyzer._get_xpath(iframe_tag)
                iframe_src = iframe_tag['src'] if iframe_tag.has_attr(
                    'src') else None
                try:  #not knowing what error in iframe_tag.clear(): no src
                    if configuration.is_dom_inside_iframe():
                        self.get_dom_of_iframe(configuration, dom_list,
                                               [iframe_xpath], iframe_src)
                    iframe_tag.clear()
                except Exception as e:
                    logging.error(
                        ' get_dom_of_iframe: %s \t\t__from crawler.py get_dom_list() ',
                        str(e))
        dom_list.append({
            'url': url,
            'dom': str(soup),
            'iframe_path': None,
        })
        brID = self.browserID

        return dom_list, url
Beispiel #9
0
    def get_dom_of_iframe(self, configuration, dom_list, iframe_xpath_list,
                          src):
        dom = self.switch_iframe_and_get_source(iframe_xpath_list)
        soup = BeautifulSoup(dom, 'html5lib')
        for frame in configuration.get_frame_tags():
            for iframe_tag in soup.find_all(frame):
                iframe_xpath = DomAnalyzer._get_xpath(iframe_tag)
                iframe_xpath_list.append(iframe_xpath)
                iframe_src = iframe_tag['src'] if iframe_tag.has_attr(
                    'src') else None
                if iframe_src and self.is_same_domain(configuration,
                                                      iframe_src):
                    try:
                        print('_2:', iframe_xpath, '  : ', iframe_src)
                        self.get_dom_of_iframe(configuration, dom_list,
                                               iframe_xpath_list, iframe_src)
                        iframe_tag.clear()
                    except Exception as e:
                        logging.error(
                            ' get_dom_of_iframe: %s \t\t__from crawler.py get_dom_list() ',
                            str(e))

        dom_list.append({
            'url': src,
            'dom': str(soup.prettify()),
            'iframe_path': iframe_xpath_list,
        })
Beispiel #10
0
 def get_clickable(self):
     dom = '''
     <html><body>
     <form data-prop="formNode" class="sup-form sup-account-form">
     <p>
     <input placeholder="Your name" data-prop="nameNode" data-event="input:onInfoInput" class="sup-info-name" data-l10n-id="setup-info-name" x-inputmode="verbatim" inputmode="verbatim" dir="auto" required="" type="text">
     <button type="reset"></button>
     </p>
     <p>
     <input placeholder="*****@*****.**" data-prop="emailNode" data-event="input:onInfoInput" class="sup-info-email" data-l10n-id="setup-info-email" dir="auto" required="" type="email">
     <button type="reset"></button>
     </p>
     <p>
     <button data-prop="nextButton" data-event="click:onNext" class="sup-info-next-btn recommend" disabled="">
       <span data-l10n-id="setup-info-next">Next</span>
     </button>
     <button data-prop="manualConfig" data-event="click:onClickManualConfig" class="sup-manual-config-btn" disabled="" data-l10n-id="setup-manual-config2">Manual setup</button>
     </p>
     </form>
     </body></html>
     '''
     clickables = DomAnalyzer.get_clickables(dom)
     self.assertEqual(len(clickables), 4)
     self.assertEqual(len(clickables[3].get_forms()), 1)
     self.assertEqual(len(clickables[3].get_forms()[0].get_inputs()), 2)
     self.assertEqual(clickables[3].get_forms()[0].get_inputs()[1].get_xpath(), '//html/body/form[1]/p[2]/input[1]')
     self.assertEqual(clickables[3].get_forms()[0].get_inputs()[1].get_type(), 'email')
     
     '''
Beispiel #11
0
    def analysis_with_other_browser( self ):
        # 1. check url is the same or not : should be the same
        # 2. check state is the same or not : should be the same
        
        dom_list1,url1 =self.executor.get_dom_list(self.configuration)
        dom_list2,url2 =self.other_executor.get_dom_list(self.configuration)

        if url1!=url2:
            print ("===different pages")
            logging.info('CBT:browser 1 page:%s browser 2 page:%s| page are different', url1, url2 )
        elif url1==url2:
            string="url is same: "+self.executor.get_url()
            print(string)
            logging.info(' CBT_events : '+ string )
        '''
        if dom_list1==dom_list2:
            string="dom tree is same: "+self.executor.get_url()
            print(string)
            logging.info(' CBT_events : '+ string )
        '''
        if DomAnalyzer.is_equal(dom_list1[0]['dom'],dom_list2[0]['dom']):
            print("===same dom tree")
            logging.info('CBT: same dom tree')
        else:
            print ("===different dom_tree")
            #need domtree mapping
            logging.info('CBT: browser 1 dom:%s browser 2 dom:%s| dom trees are different', url1, url2 )
    def get_dom_list(self, configuration):
        #save dom of iframe in list of StateDom [iframe_path_list, dom, url/src, normalize dom]
        dom_list = []
        new_dom = self.get_source()
        url = self.get_url()
        soup = BeautifulSoup(new_dom, 'html5lib')
        for frame in configuration.get_frame_tags():
            for iframe_tag in soup.find_all(frame):
                iframe_xpath = DomAnalyzer._get_xpath(iframe_tag)
                iframe_src = iframe_tag['src'] if iframe_tag.has_attr('src') else None
                if configuration.is_dom_inside_iframe() and iframe_src and self.is_same_domain( configuration, iframe_src ):
                    try: #not knowing what error in iframe_tag.clear(): no src
                        print( '_1:',iframe_xpath,'  : ',iframe_src )
                        self.get_dom_of_iframe(configuration, dom_list, [iframe_xpath], iframe_src)
                        iframe_tag.clear()
                    except Exception as e:
                        logging.error(' get_dom_of_iframe: %s \t\t__from crawler.py get_dom_list() ', str(e))

        dom_list.append( {
                'url' : url,
                'dom' : str(soup.prettify()),
                'iframe_path' : None,
            } )

        return dom_list, url
Beispiel #13
0
 def add_new_events(self, state, prev_state, depth):
     for clickables, iframe_key in DomAnalyzer.get_clickables(state, prev_state if prev_state else None):
         for clickable in clickables:
             self.crawler.action_events.append( {
                     'state'  : state,
                     'action' : { 'clickable':clickable, 'iframe_key':iframe_key },
                     'depth'  : depth,
                 } )
Beispiel #14
0
 def check_dom_tree(self):
   
     
     if url1!=url2:
         print ("===different pages")
         logging.info('CBT:browser 1 page:%s browser 2 page:%s| page are different', url1, url2 )
     if DomAnalyzer.is_equal(dom_tree1,dom_tree2):
         print("===same dom tree")
         logging.info('CBT: same dom tree')
     else:
         print ("===different dom_tree")
         #need domtree mapping
         logging.info('CBT: browser 1 dom:%s browser 2 dom:%s| dom trees are different', url1, url2 )
Beispiel #15
0
 def add_state(self, state):
     # check if the automata is empty
     if not self._initial_state:
         self._initial_state = state
         self._current_state = state
     else:
         # check if the dom is duplicated
         for s in self._states:
             if DomAnalyzer.is_equal(s.get_dom(), state.get_dom()):
                 return s, False
     state_id = state.get_id() if state.get_id() else str(len(self._states))
     state.set_id(state_id)
     self._states.append(state)
     return state, True
Beispiel #16
0
 def test_get_xpath(self):
     html_doc = '''
     <html><body>
       <div></div>
       <div>
         <form>
           <input><button></button>
         </form>
       </div>
     </body></html>
     '''
     soup = BeautifulSoup(html_doc, 'html.parser')
     form = soup.find('form')
     self.assertEqual(DomAnalyzer._get_xpath(form), '//html/body/div[2]/form[1]')
Beispiel #17
0
 def add_state(self, state):
     # check if the automata is empty
     if not self._initial_state:
         self._initial_state = state
         self._current_state = state
     else:
         # check if the dom is duplicated
         for s in self._states:
             if DomAnalyzer.is_equal(s.get_dom(), state.get_dom()):
                 return s, False
     state_id = state.get_id() if state.get_id() else str(len(self._states))
     state.set_id(state_id)
     self._states.append(state)
     return state, True
Beispiel #18
0
    def add_new_events(self, state, prev_state, depth):
        candidate_clickables = []

        for clickables, iframe_key in DomAnalyzer.get_clickables(state, prev_state if prev_state else None):
            for clickable in clickables:
                candidate_clickables.append( (clickable, iframe_key) )
        if not candidate_clickables:
            return

        clickable, iframe_key = random.choice( candidate_clickables )
        self.crawler.action_events.append( {
            'state'  : state,
            'action' : { 'clickable':clickable, 'iframe_key':iframe_key },
            'depth'  : depth,
        } )
        print(state.get_id(),clickable.get_id(), clickable.get_xpath())
Beispiel #19
0
    def save_dom(self, state):
        try:
            #make dir for each state
            state_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id() )
            if not os.path.isdir(state_dir):
                os.makedirs(state_dir)

            iframe_key_dict = { 'num': 0 }
            for stateDom in state.get_dom_list(self.configuration):
                iframe_key = ';'.join(stateDom['iframe_path']) if stateDom['iframe_path'] else None
                #make new dir for iframe
                if stateDom['iframe_path']:
                    iframe_key_dict['num'] += 1
                    iframe_key_dict[ str(iframe_key_dict['num']) ] = { 'path' : stateDom['iframe_path'], 'url': stateDom['url'] }
                    dom_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id(), str(iframe_key_dict['num']) )
                    if not os.path.isdir(dom_dir):
                        os.makedirs(dom_dir)
                else:
                    iframe_key_dict['basic'] = { 'url' : stateDom['url'] }
                    dom_dir = os.path.join( self.configuration.get_abs_path('dom'), state.get_id() )

                with codecs.open( os.path.join( dom_dir, state.get_id()+'.txt'),            'w', encoding='utf-8' ) as f:
                    f.write( stateDom['dom'] )
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_nor.txt'),        'w', encoding='utf-8' ) as f:
                    f.write( DomAnalyzer.normalize( stateDom['dom'] ) )
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_inputs.txt'),     'w', encoding='utf-8' ) as f:
                    json.dump(state.get_inputs_json( iframe_key ),               f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_selects.txt'),    'w', encoding='utf-8' ) as f:
                    json.dump(state.get_selects_json(iframe_key ),               f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_radios.txt'),     'w', encoding='utf-8' ) as f:
                    json.dump(state.get_radios_json(iframe_key ),                f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_checkboxes.txt'), 'w', encoding='utf-8' ) as f:
                    json.dump(state.get_checkboxes_json(iframe_key ),            f, indent=2, sort_keys=True, ensure_ascii=False)
                with codecs.open( os.path.join( dom_dir, state.get_id()+'_clicks.txt'),     'w', encoding='utf-8' ) as f:
                    json.dump(state.get_candidate_clickables_json( iframe_key ), f, indent=2, sort_keys=True, ensure_ascii=False)
                
            with codecs.open( os.path.join( state_dir, 'iframe_list.json'),  'w', encoding='utf-8' ) as f:
                json.dump( iframe_key_dict, f, indent=2, sort_keys=True, ensure_ascii=False)

            """
            TODO: turn TempFile stateDom into FilePath stateDom
            """
            state.clear_dom()

        except Exception as e:  
            logging.error(' save dom : %s \t\t__from automata.py save_dom()', str(e))
Beispiel #20
0
 def get_dom_of_iframe(self, configuration, dom_list, iframe_xpath_list, src):
     dom = self.switch_iframe_and_get_source(iframe_xpath_list)
     soup = BeautifulSoup(dom, 'html5lib')
     for frame in configuration.get_frame_tags():
         for iframe_tag in soup.find_all(frame):
             iframe_xpath = DomAnalyzer._get_xpath(iframe_tag)
             iframe_xpath_list.append(iframe_xpath)
             iframe_src = iframe_tag['src'] if iframe_tag.has_attr('src') else None
             try:
                 self.get_dom_of_iframe(configuration, dom_list, iframe_xpath_list, iframe_src)      
                 iframe_tag.clear()
             except Exception as e:
                 logging.error(' get_dom_of_iframe: %s \t\t__from crawler.py get_dom_list() ', str(e))
     dom_list.append( {
             'url' : src,
             'dom' : str(soup),
             'iframe_path' : iframe_xpath_list,
         } )
 def set_simple_clickable_tags(self):
     self._analyzer['simple_clickable_tags'] = True
     DomAnalyzer.set_simple_clickable_tags()
 def set_tag_with_attribute_normalizer(self, tag_name, attr=None, value=None, mode=None):
     self._analyzer['tag_with_attribute_normalizers'].append({'tag':tag_name, 'attr':attr, 'value':value, 'mode':mode})
     DomAnalyzer.add_tag_with_attribute_normalizer(tag_name, attr, value, mode)
 def set_attributes_normalizer(self, attrs):
     self._analyzer['attributes_normalizer'] += attrs
     DomAnalyzer.add_attributes_normalizer(attrs)
 def set_tags_normalizer(self, tags):
     self._analyzer['tag_normalizers'] += tags
     DomAnalyzer.add_tags_normalizer(tags)
 def set_tags_normalizer(self, tags):
     self._analyzer['tag_normalizers'] += tags
     DomAnalyzer.add_tags_normalizer(tags)
Beispiel #26
0
    def add_remove_clickable_tags(self):
        return
        dom = '''
        <html><body>
        <form data-prop="formNode" class="sup-form sup-account-form">
        <p>
        <input placeholder="Your name" data-prop="nameNode" data-event="input:onInfoInput" class="sup-info-name" data-l10n-id="setup-info-name" x-inputmode="verbatim" inputmode="verbatim" dir="auto" required="" type="text">
        <button type="reset">reset1</button>
        </p>
        <p>
        <input placeholder="*****@*****.**" data-prop="emailNode" data-event="input:onInfoInput" class="sup-info-email" data-l10n-id="setup-info-email" dir="auto" required="" type="email">
        <button type="reset">reset2</button>
        </p>
        <p>
        <button data-prop="nextButton" data-event="click:onNext" class="sup-info-next-btn recommend" disabled="">
          <span data-l10n-id="setup-info-next">Next</span>
        </button>
        <button data-prop="manualConfig" data-event="click:onClickManualConfig" class="sup-manual-config-btn" disabled="" data-l10n-id="setup-manual-config2">Manual setup</button>
        </p>
        </form>
        </body></html>
        '''

        # Tag('a'), Tag('button'), Tag('input', {'type': 'submit'}), Tag('input', {'type': 'button'})
        self.assertEqual(len(DomAnalyzer.get_clickable_tags()), 4)
        DomAnalyzer.remove_clickable_tags(Tag('a'))
        DomAnalyzer.remove_clickable_tags(Tag('button'))
        DomAnalyzer.remove_clickable_tags(Tag('button'))
        DomAnalyzer.remove_clickable_tags(Tag('input', {'type': 'submit'}))
        DomAnalyzer.remove_clickable_tags(Tag('input', {'type': 'button'}))
        self.assertEqual(len(DomAnalyzer.get_clickable_tags()), 0)
        DomAnalyzer.add_clickable_tags(Tag('button', {'type': 'reset'}))
        self.assertEqual(len(DomAnalyzer.get_clickables(dom)), 2)
 def set_simple_inputs_tags(self):
     self._analyzer['simple_inputs_tags'] = True
     DomAnalyzer.set_simple_inputs_tags()
 def set_simple_inputs_tags(self):
     self._analyzer['simple_inputs_tags'] = True
     DomAnalyzer.set_simple_inputs_tags()
 def set_simple_normalizers(self):
     self._analyzer['simple_normalizers'] = True
     DomAnalyzer.set_simple_normalizers()
Beispiel #30
0
    def crawl(self, depth, prev_state=None):
        if depth <= self.configuration.get_max_depth():
            cs = self.automata.get_current_state()
            if not self.violate_invariant(cs.get_dom(), cs.get_id()):
                candidate_clickables = DomAnalyzer.get_clickables(
                    cs.get_dom(),
                    prev_state.get_dom() if prev_state else None)
                self.num_clickables['unexamined'] += len(candidate_clickables)
                for clickable in candidate_clickables:
                    # prefetch image of the clickable
                    time.sleep(0.2)  # time for correctly fetching image
                    img_name = cs.get_id() + '-' + clickable.get_id() + '.png'
                    img_data = self.executor.get_screenshot(clickable)

                    # fire the clickable
                    logger.debug('Fire event in state %s', cs.get_id())
                    self.executor.empty_form(clickable)
                    self.executor.fill_form(clickable)
                    ft = FireEventThread(self.executor, clickable)
                    ft.start()
                    ft.join(self.configuration.get_sleep_time() *
                            2)  # time out after sleep_time*2 seconds
                    if ft.is_alive():  # timed out
                        logger.error(
                            'No response while firing an event. Execution sequences:'
                        )
                        self.exe_stack.append(
                            clickable
                        )  # add the clickable triggering No Response
                        for c in self.exe_stack:
                            logger.error(c)
                        logger.error(
                            'Total clickables found: %d (true: %d, false: %d, unexamined: %d)',
                            self.num_clickables['unexamined'] +
                            self.num_clickables['true'] +
                            self.num_clickables['false'],
                            self.num_clickables['true'],
                            self.num_clickables['false'],
                            self.num_clickables['unexamined'])
                        logger.error('Program terminated.')
                        sys.exit()
                    time.sleep(self.configuration.get_sleep_time())
                    self.num_clickables['unexamined'] -= 1

                    new_dom = self.executor.get_source()
                    if DomAnalyzer.is_equal(cs.get_dom(), new_dom):
                        self.num_clickables['false'] += 1
                    else:
                        self.num_clickables['true'] += 1
                        cs.add_clickable(clickable)
                        self.exe_stack.append(clickable)
                        self.save_screenshot(img_name, img_data, 'clickable')
                        ns, is_newly_added = self.automata.add_state(
                            State(new_dom))
                        self.automata.add_edge(cs, ns, clickable)
                        if is_newly_added:
                            self.save_screenshot(
                                ns.get_id() + '.png',
                                self.executor.get_screenshot(), 'state')
                            self.save_dom(ns)
                            self.automata.change_state(ns)
                            self.crawl(depth + 1, cs)
                        self.exe_stack.pop(-1)
                        self.automata.change_state(cs)
                        self.backtrack(cs)
 def set_inputs_tag(self, input_type):
     self._analyzer['inputs_tags'].append(input_type)
     DomAnalyzer.add_inputs_tag(input_type)
 def set_clickable_tag(self, tag_name, attr=None, value=None):
     self._analyzer['clickable_tags'].append({'tag':tag_name, 'attr':attr, 'value':value})
     DomAnalyzer.add_clickable_tag(tag_name, attr, value)
 def set_attributes_normalizer(self, attrs):
     self._analyzer['attributes_normalizer'] += attrs
     DomAnalyzer.add_attributes_normalizer(attrs)
 def set_inputs_tag(self, input_type):
     self._analyzer['inputs_tags'].append(input_type)
     DomAnalyzer.add_inputs_tag(input_type)
 def set_simple_clickable_tags(self):
     self._analyzer['simple_clickable_tags'] = True
     DomAnalyzer.set_simple_clickable_tags()
 def is_normalize_equal(self, list_dom, new_dom):
     return DomAnalyzer.is_normalize_equal(list_dom, new_dom)
 def set_simple_normalizers(self):
     self._analyzer['simple_normalizers'] = True
     DomAnalyzer.set_simple_normalizers()
Beispiel #38
0
    def save_dom(self, state):
        try:
            #make dir for each state
            state_dir = os.path.join(self.configuration.get_abs_path('dom'),
                                     state.get_id())
            if not os.path.isdir(state_dir):
                os.makedirs(state_dir)

            iframe_key_dict = {'num': 0}
            for stateDom in state.get_dom_list(self.configuration):
                iframe_key = ';'.join(stateDom['iframe_path']
                                      ) if stateDom['iframe_path'] else None
                #make new dir for iframe
                if stateDom['iframe_path']:
                    iframe_key_dict['num'] += 1
                    iframe_key_dict[str(iframe_key_dict['num'])] = {
                        'path': stateDom['iframe_path'],
                        'url': stateDom['url']
                    }
                    dom_dir = os.path.join(
                        self.configuration.get_abs_path('dom'), state.get_id(),
                        str(iframe_key_dict['num']))
                    if not os.path.isdir(dom_dir):
                        os.makedirs(dom_dir)
                else:
                    iframe_key_dict['basic'] = {'url': stateDom['url']}
                    dom_dir = os.path.join(
                        self.configuration.get_abs_path('dom'), state.get_id())

                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    f.write(stateDom['dom'])
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_nor.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    f.write(DomAnalyzer.normalize(stateDom['dom']))
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_inputs.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_inputs_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_selects.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_selects_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_radios.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_radios_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(
                        dom_dir,
                        state.get_id() + '_checkboxes.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_checkboxes_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)
                with codecs.open(os.path.join(dom_dir,
                                              state.get_id() + '_clicks.txt'),
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(state.get_candidate_clickables_json(iframe_key),
                              f,
                              indent=2,
                              sort_keys=True,
                              ensure_ascii=False)

            with codecs.open(os.path.join(state_dir, 'iframe_list.json'),
                             'w',
                             encoding='utf-8') as f:
                json.dump(iframe_key_dict,
                          f,
                          indent=2,
                          sort_keys=True,
                          ensure_ascii=False)
            """
            TODO: turn TempFile stateDom into FilePath stateDom
            """
            state.clear_dom()

        except Exception as e:
            logging.error(' save dom : %s \t\t__from automata.py save_dom()',
                          str(e))