Exemple #1
0
 def test_inline_databank(self):
     from data_bank import InlineDataBank
     InlineDataBank.add_item('email', '*****@*****.**')
     InlineDataBank.add_item('email', '*****@*****.**')  # duplicated adding
     InlineDataBank.remove_item('email', '*****@*****.**')  # invalid removal
     InlineDataBank.remove_item('email', '*****@*****.**')  # valid removal
     self.assertEqual(len(InlineDataBank.get_data('email')), 2)
     InlineDataBank.add_item('phone-number', '0912345678')   # add data into a new type
     InlineDataBank.add_item('phone-number', '0987654321')
     self.assertEqual(len(InlineDataBank.get_types()), 4)
     self.assertEqual(len(InlineDataBank.get_data('phone-number')), 2)
     self.assertIsNone(InlineDataBank.get_data('text'))
Exemple #2
0
    def get_clickables(cls, dom, prev_dom=None):
        # only return newly discovered clickables and forms, i.e. clickables not in prev_clickables
        prev_clickables = []
        prev_forms = []
        if prev_dom:
            prev_soup = BeautifulSoup(prev_dom, 'html.parser')
            for tag in cls._clickable_tags:
                if tag.get_attr():
                    for attr, value in tag.get_attr().items():
                        prev_clickables += prev_soup.find_all(
                            tag.get_name(), attrs={attr: value})
                else:
                    prev_clickables += prev_soup.find_all(tag.get_name())
            prev_forms = prev_soup.find_all('form')
        soup = BeautifulSoup(dom, 'html.parser')
        forms = soup.find_all('form')
        clickables = []
        # clickables with forms and inputs attached
        for form in forms:
            if form in prev_forms:
                continue
            form_id = form.get('id')
            if not form_id:
                form_id = cls.serial_prefix + str(cls._serial_num)
                cls._serial_num += 1
            f = FormField(form_id, cls._get_xpath(form))
            for input_type in cls.input_types:
                inputs = form.find_all('input', attrs={'type': input_type})
                for my_input in inputs:
                    data_set = InlineDataBank.get_data(input_type)
                    if data_set:
                        value = random.choice(list(data_set))
                    else:
                        value = ''.join(
                            random.choice(string.lowercase) for i in xrange(8))
                    input_id = my_input.get('id')
                    if not input_id:
                        input_id = cls.serial_prefix + str(cls._serial_num)
                        cls._serial_num += 1
                    f.add_input(
                        InputField(input_id, cls._get_xpath(my_input),
                                   input_type, value))
            for tag in cls._clickable_tags:
                if tag.get_attr():
                    for attr, value in tag.get_attr().items():
                        candidate_clickables = form.find_all(
                            tag.get_name(), attrs={attr: value})
                else:
                    candidate_clickables = form.find_all(tag.get_name())
                for candidate_clickable in candidate_clickables:
                    if candidate_clickable in prev_clickables:
                        continue
                    clickable_id = candidate_clickable.get('id')
                    if not clickable_id:
                        clickable_id = cls.serial_prefix + str(cls._serial_num)
                        cls._serial_num += 1
                    c = Clickable(clickable_id,
                                  cls._get_xpath(candidate_clickable),
                                  tag.get_name())
                    c.add_form(f)
                    clickables.append(c)

        # other clickables
        for tag in cls._clickable_tags:
            if tag.get_attr():
                for attr, value in tag.get_attr().items():
                    candidate_clickables = soup.find_all(tag.get_name(),
                                                         attrs={attr: value})
            else:
                candidate_clickables = soup.find_all(tag.get_name())
            for candidate_clickable in candidate_clickables:
                #print candidate_clickable
                if candidate_clickable in prev_clickables:
                    continue
                if not cls._is_duplicate(clickables, candidate_clickable):
                    clickable_id = candidate_clickable.get('id')
                    if not clickable_id:
                        clickable_id = cls.serial_prefix + str(cls._serial_num)
                        cls._serial_num += 1
                    clickables.append(
                        Clickable(clickable_id,
                                  cls._get_xpath(candidate_clickable),
                                  tag.get_name()))
        return clickables
Exemple #3
0
    def get_clickables(cls, dom, prev_dom=None):
        # only return newly discovered clickables and forms, i.e. clickables not in prev_clickables
        prev_clickables = []
        prev_forms = []
        if prev_dom:
            prev_soup = BeautifulSoup(prev_dom, 'html.parser')
            for tag in cls._clickable_tags:
                if tag.get_attr():
                    for attr, value in tag.get_attr().items():
                        prev_clickables += prev_soup.find_all(tag.get_name(), attrs={attr: value})
                else:
                    prev_clickables += prev_soup.find_all(tag.get_name())
            prev_forms = prev_soup.find_all('form')
        soup = BeautifulSoup(dom, 'html.parser')
        forms = soup.find_all('form')
        clickables = []
        # clickables with forms and inputs attached
        for form in forms:
            if form in prev_forms:
                continue
            form_id = form.get('id')
            if not form_id:
                form_id = cls.serial_prefix + str(cls._serial_num)
                cls._serial_num += 1
            f = FormField(form_id, cls._get_xpath(form))
            for input_type in cls.input_types:
                inputs = form.find_all('input', attrs={'type': input_type})
                for my_input in inputs:
                    data_set = InlineDataBank.get_data(input_type)
                    if data_set:
                        value = random.choice(list(data_set))
                    else:
                        value = ''.join(random.choice(string.lowercase) for i in xrange(8))
                    input_id = my_input.get('id')
                    if not input_id:
                        input_id = cls.serial_prefix + str(cls._serial_num)
                        cls._serial_num += 1
                    f.add_input(InputField(input_id, cls._get_xpath(my_input), input_type, value))
            for tag in cls._clickable_tags:
                if tag.get_attr():
                    for attr, value in tag.get_attr().items():
                        candidate_clickables = form.find_all(tag.get_name(), attrs={attr: value})
                else:
                    candidate_clickables = form.find_all(tag.get_name())
                for candidate_clickable in candidate_clickables:
                    if candidate_clickable in prev_clickables:
                        continue
                    clickable_id = candidate_clickable.get('id')
                    if not clickable_id:
                        clickable_id = cls.serial_prefix + str(cls._serial_num)
                        cls._serial_num += 1
                    c = Clickable(clickable_id, cls._get_xpath(candidate_clickable), tag.get_name())
                    c.add_form(f)
                    clickables.append(c)

        # other clickables
        for tag in cls._clickable_tags:
            if tag.get_attr():
                for attr, value in tag.get_attr().items():
                    candidate_clickables = soup.find_all(tag.get_name(), attrs={attr: value})
            else:
                candidate_clickables = soup.find_all(tag.get_name())
            for candidate_clickable in candidate_clickables:
                #print candidate_clickable
                if candidate_clickable in prev_clickables:
                        continue
                if not cls._is_duplicate(clickables, candidate_clickable):
                    clickable_id = candidate_clickable.get('id')
                    if not clickable_id:
                        clickable_id = cls.serial_prefix + str(cls._serial_num)
                        cls._serial_num += 1
                    clickables.append(Clickable(clickable_id, cls._get_xpath(candidate_clickable), tag.get_name()))
        return clickables