Beispiel #1
0
    def check_js(self, doc: BeautifulSoup):
        elements = doc.select('script')
        for tag in elements:
            path = tag.get('src')
            if path is None:
                self.result = False

                reduct_point = self.POINT * 1
                self.minus += reduct_point
                msg = 'script 標籤內沒有設定 src  tag: {}'.format(tag)
                error_data = et.get_error_data(17, msg, 1, reduct_point)

                self.errors.append(error_data)
                continue

            else:
                regex_check = re.findall(
                    r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\), ]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                    path)
                if regex_check != 0:
                    continue
                else:
                    check_list = path.split('/')
                    if 'js' not in check_list:
                        self.result = False

                        reduct_point = self.POINT * 1
                        self.minus += reduct_point
                        msg = 'javascript 未寫在 js 資料夾中。 path: {}'.format(path)
                        error_data = et.get_error_data(17, msg, 1,
                                                       reduct_point)

                        self.errors.append(error_data)
Beispiel #2
0
    def check_img_display(self, url: str, doc: bs4.BeautifulSoup):
        img_list = doc.find_all('img')
        for img in img_list:
            path = img.get('src')
            # path = path.replace('data:', '')  # $$ FIND-AT: 2020/06/19
            abs_url = urljoin(url, path)

            try:
                res = requests.get(abs_url)
            except Exception as e:
                self.result = False

                reduct_point = self.POINT * 1
                self.minus += reduct_point
                msg = '圖片完全無法讀取。 img_path: {}'.format(path)
                error_data = et.get_error_data(10, msg, 1, reduct_point)

                self.errors.append(error_data)
                continue

            status = res.status_code
            if status != 200 and status != 403:
                self.result = False

                reduct_point = self.POINT * 1
                self.minus += reduct_point
                msg = '圖片顯示狀態錯誤。 status: {} , img-path: {}'.format(
                    status, path)
                error_data = et.get_error_data(10, msg, 1, reduct_point)

                self.errors.append(error_data)
Beispiel #3
0
    def check_doc_type(self):
        if '<!DOCTYPE html>' not in self.html:
            self.result = False

            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = '<!DOCTYPE> 標籤錯誤'
            error_data = et.get_error_data(2, msg, 1, reduct_point)

            self.errors.append(error_data)
Beispiel #4
0
    def check_scroll_bar(self, browser: webdriver):
        width = browser.execute_script("return document.body.scrollWidth")
        if width > 1280:
            self.result = False

            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = '網頁出現橫向卷軸。 網頁寬度: {}'.format(width)
            error_data = et.get_error_data(8, msg, 1, reduct_point)

            self.errors.append(error_data)
Beispiel #5
0
    def check_title(self, doc: BeautifulSoup):
        tag = doc.select('head title')

        if len(tag) == 0:
            self.result = False
            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = 'title 標籤未建立'
            error_data = et.get_error_data(5, msg, 1, reduct_point)
            self.errors.append(error_data)
            return

        title = tag[0].text
        if len(title) == 0 or title is None:
            self.result = False
            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = 'title 標籤錯誤'
            error_data = et.get_error_data(5, msg, 1, reduct_point)
            self.errors.append(error_data)
Beispiel #6
0
    def check_ul_tag(self, doc: BeautifulSoup):
        wrong_elements = []
        self.get_wrong_ul(doc, wrong_elements)
        for el in wrong_elements:
            self.result = False

            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = 'ul tag 使用錯誤, 錯誤元素: {} , 當前標籤: {} , 父標籤: {}'.format(str(el).replace('\n', ''),
                                                                      el.name, el.parent.name)
            error_data = et.get_error_data(24, msg, 1, reduct_point)
            self.errors.append(error_data)
Beispiel #7
0
    def check_access(self) -> bool:
        try:
            self.res = requests.get(self.url)
            return True

        except Exception as e:
            self.result = False
            self.minus += 100
            msg = '無法讀取作業網頁。 url: {} -> error: {}'.format(self.url, e)
            error_data = et.get_error_data(1, msg, 1, 100)
            self.errors.append(error_data)

            return False
Beispiel #8
0
    def check_response_status(self) -> bool:
        status = self.res.status_code
        if status != 200:
            self.result = False
            self.minus += 100
            msg = '網站不存在,或路徑錯誤。 status: {} , url: {}'.format(status, self.url)
            error_data = et.get_error_data(1, msg, 1, 100)
            self.errors.append(error_data)

            return False

        else:
            return True
Beispiel #9
0
    def check_img_setting(self, url: str, doc: BeautifulSoup):
        elements = doc.select('body img')

        for tag in elements:
            path = tag.get('src')
            abs_url = urljoin(url, path)
            height = tag.get('height')
            width = tag.get('width')

            if height is not None and re.match(r'[0-9]+%', str(width)):
                if re.match(r'[0-9]+\s*px', str(height)) and re.match(
                        r'[0-9]+\s*px', str(width)):
                    h = int(height.replace('px', ''))
                    w = int(width.replace('px', ''))
                    hwp = round(h / w, 2)

                    raw_hwp = self.get_img_hwp(abs_url)

                    if hwp != raw_hwp:
                        self.result = False

                        reduct_point = self.POINT * 1
                        self.minus += reduct_point
                        msg = '圖片設定後的寬高比例與原圖比例不同。 tag: {}'.format(tag)
                        error_data = et.get_error_data(18, msg, 1,
                                                       reduct_point)

                        self.errors.append(error_data)

                else:
                    self.result = False

                    reduct_point = self.POINT * 1
                    self.minus += reduct_point
                    msg = '圖片設定錯誤。 tag: {}'.format(tag)
                    error_data = et.get_error_data(19, msg, 1, reduct_point)

                    self.errors.append(error_data)
Beispiel #10
0
    def check_css(self, doc: BeautifulSoup):
        elements = doc.select('link[rel = "stylesheet"][type = "text/css"]')
        for tag in elements:
            path = tag.get('href')
            check_list = path.split('/')
            if 'css' not in check_list:
                self.result = False

                reduct_point = self.POINT * 1
                self.minus += reduct_point
                msg = 'css未放入css資料夾中。 path: {}'.format(path)
                error_data = et.get_error_data(16, msg, 1, reduct_point)

                self.errors.append(error_data)
Beispiel #11
0
    def check_li_tag(self, doc: BeautifulSoup):
        li_list = doc.select('li')
        for li in li_list:
            parent_tag = li.parent.name
            if parent_tag not in ['ul', 'ol']:
                self.result = False

                reduct_point = self.POINT * 1
                self.minus += reduct_point
                msg = 'li標籤單獨使用, 錯誤元素: {} , 父標籤: {}'.format(
                    str(li).replace('\n', ''), parent_tag)
                error_data = et.get_error_data(26, msg, 1, reduct_point)

                self.errors.append(error_data)
Beispiel #12
0
    def check_lang(self, doc: BeautifulSoup):
        tag = doc.select('html[lang]')
        if len(tag) == 0:
            self.result = False

            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = 'lang 屬性未建立'
            error_data = et.get_error_data(4, msg, 1, reduct_point)

            self.errors.append(error_data)

            return

        lang = tag[0].get('lang')
        if lang not in ['zh-TW', 'zh-tw', 'ZH-TW']:
            self.result = False

            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = 'lang 屬性錯誤。 tag: {}'.format(lang)
            error_data = et.get_error_data(4, msg, 1, reduct_point)

            self.errors.append(error_data)
Beispiel #13
0
    def check_charset(self, doc: BeautifulSoup):
        tag = doc.select('meta[charset]')
        if len(tag) == 0:
            self.result = False


            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = 'charset 標籤未建立'
            error_data = et.get_error_data(3, msg, 1, reduct_point)

            self.errors.append(error_data)
            return

        charset = tag[0].get('charset')
        if charset not in ['utf-8', 'utf8', 'UTF8', 'UTF-8']:
            self.result = False

            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = 'charset 標籤錯誤。 tag: {}'.format(charset)
            error_data = et.get_error_data(3, msg, 1, reduct_point)

            self.errors.append(error_data)
Beispiel #14
0
    def content_is_empty(self) -> bool:
        text = self.doc.text.strip().replace(' ', '')
        text_count = len(text)

        if text_count < self.text_low_limit:
            self.result = False
            self.minus += 100
            msg = '網頁內容趨近空白 url: {} , 字數: {}'.format(self.url, text_count)
            error_data = et.get_error_data(1, msg, 1, 100)
            self.errors.append(error_data)

            return False

        else:
            return True
Beispiel #15
0
    def check_html_parse(self) -> bool:
        self.res.encoding = 'utf-8'
        self.html = self.res.text

        try:
            self.doc = BeautifulSoup(self.html, 'html.parser')
            return True

        except Exception as e:
            self.result = False
            self.minus += 100
            msg = '作業網頁無法解析。 url: {} -> error: {}'.format(self.url, e)
            error_data = et.get_error_data(1, msg, 1, 100)
            self.errors.append(error_data)

            return False
Beispiel #16
0
    def check_gl_symbols(self, html: str, doc: BeautifulSoup):
        temp_html = PublicTool.escape_content_gls(html, doc)
        ls_count = temp_html.count('<')
        gs_count = temp_html.count('>')

        if ls_count != gs_count:
            dev = abs(ls_count - gs_count)

            self.result = False

            reduct_point = self.POINT * dev
            self.minus += reduct_point
            msg = '<、> 符號數量錯誤。 相差: {}'.format(dev)
            error_data = et.get_error_data(21, msg, 1, reduct_point)

            self.errors.append(error_data)
Beispiel #17
0
    def check_file_path(self, url):
        check_list = url.split('/')
        if len(check_list) == 0:
            return

        file_name = check_list[-1]
        regex_check = re.findall(r'([\u4E00-\u9FFF]+|[A-Z]+|\s+)', file_name)
        if len(regex_check) != 0:
            self.result = False

            reduct_point = self.POINT * 1
            self.minus += reduct_point
            msg = '網頁檔名或是含有 "中文" 或 "大寫字母" 或 "空白字元" 。 path_string: {}'.format(
                file_name)
            error_data = et.get_error_data(11, msg, 1, reduct_point)

            self.errors.append(error_data)
Beispiel #18
0
    def check_head_body(self, doc: BeautifulSoup):
        tag_list = doc.select('head')[0].children

        for tag in tag_list:
            if type(tag) is not bs4.element.Tag:
                continue

            if tag.name not in [
                    'base', 'link', 'meta', 'script', 'style', 'title'
            ]:
                self.result = False

                reduct_point = self.POINT * 1
                self.minus += reduct_point
                msg = '網頁內容不在 <body></body> 區間內'
                error_data = et.get_error_data(7, msg, 1, reduct_point)

                self.errors.append(error_data)
Beispiel #19
0
    def check_tags(self, html: str, doc: BeautifulSoup):
        self_closing = [
            'area', 'base', 'br', 'embed', 'hr', 'iframe', 'img', 'input',
            'link', 'meta', 'param', 'source', 'track', 'address', 'aside'
        ]
        tag_list = [tag.name for tag in doc.find_all()]
        tag_list = list(set(tag_list))

        temp_html = PublicTool.escape_content_gls(html, doc)

        for tag in tag_list:
            if tag in self_closing:
                continue
            # pattern = r'<{1}' + tag + ' ' + '[^<,>]*>{1}'
            # pattern = r'^(<{1}' + tag + ' ' + ')(.*)(>{1})$'
            pattern = r'<' + tag + ' '
            normal_start = '<' + tag + '>'
            normal_end = '</' + tag + '>'
            sp_case_list = re.findall(pattern, temp_html)
            start_count = len(sp_case_list) + temp_html.count(normal_start)
            end_count = temp_html.count(normal_end)

            if start_count != end_count:

                # print(tag, start_count, end_count)

                dev = abs(start_count - end_count)

                if start_count > end_count:
                    info = normal_start + ' 多於 ' + normal_end
                else:
                    info = normal_end + ' 多於 ' + normal_start

                self.result = False

                reduct_point = self.POINT * dev
                self.minus += reduct_point
                msg = 'tag 的開始與結束有錯誤。 {} 相差數: {}'.format(info, dev)
                error_data = et.get_error_data(20, msg, dev, reduct_point)

                self.errors.append(error_data)
Beispiel #20
0
    def check_attr_space(self, html: str, doc: BeautifulSoup):
        tag_list = [tag.name for tag in doc.find_all()]
        tag_list = list(set(tag_list))
        hp = r'href=\"([^"]*)\"'
        srcp = r'src=\"([^"]*)\"'

        for tag in tag_list:
            pattern = r'<{1}' + tag + ' [^<,>]*>{1}'
            find_list = re.findall(pattern, html)

            for check_tag in find_list:
                temp_tag = check_tag
                if bool(re.findall(hp, temp_tag)):
                    replace_p = r' href=\"([^"]*)\"'
                    temp_tag = re.sub(replace_p, '', temp_tag)

                if bool(re.findall(srcp, temp_tag)):
                    replace_p = r' src=\"([^"]*)\"'
                    temp_tag = re.sub(replace_p, '', temp_tag)

                req_p = r'\"([^"]*)\"'
                re_list = re.findall(req_p, temp_tag)

                for re_str in re_list:
                    re_str = re_str.replace('"', '')
                    temp_tag = temp_tag.replace(re_str, '')

                class_num = temp_tag.count('=')
                space_num = temp_tag.count(' ')

                if space_num < class_num:
                    self.result = False

                    reduct_point = self.POINT * 1
                    self.minus += reduct_point
                    msg = '元素屬性之間沒有用空白隔開 tag: {}'.format(check_tag)
                    error_data = et.get_error_data(22, msg, 1, reduct_point)

                    self.errors.append(error_data)
Beispiel #21
0
    def check_window_open(self, url: str, doc: BeautifulSoup):
        elements = doc.select('body [href]')

        for tag in elements:
            link = tag.get('href')
            if 'tel:' in link and '+' in link:
                continue
            if 'mailto:' in link and '@' in link:
                continue

            abs_url = urljoin(url, link)

            if PublicTool.is_external_domain(
                    url, abs_url) and tag.get('target') != '_blank':
                self.result = False

                reduct_point = self.POINT * 1
                self.minus += reduct_point

                msg = '連結到外部連結沒有新開視窗。 tag: {}'.format(tag)
                error_data = et.get_error_data(14, msg, 1, reduct_point)
                self.errors.append(error_data)
Beispiel #22
0
    def check_attr_quote(self, html: str, doc: BeautifulSoup):
        temp_html = PublicTool.escape_content_gls(html, doc)
        tags_with_attr = AttrQuoteCheck.get_tags_with_attr(temp_html, doc)

        for raw_str in tags_with_attr:
            soup = BeautifulSoup(raw_str, 'html.parser')
            for el in soup():
                all_attrs = list(el.attrs.keys())
                equal_symbol_count = 0

                for attr_name in all_attrs:
                    equal_symbol_count += raw_str.count('{}'.format(attr_name))  # edit at 2020/06/23

                quote_count = raw_str.count('"')

                if quote_count != equal_symbol_count * 2:
                    self.result = False

                    reduct_point = self.POINT * 1
                    self.minus += reduct_point
                    msg = '屬性的 " " 符號沒有正確使用。 錯誤元素: {}'.format(raw_str)
                    error_data = et.get_error_data(23, msg, 1, reduct_point)

                    self.errors.append(error_data)
Beispiel #23
0
    def check_all_src_name(self, url: str, doc: BeautifulSoup):
        # file part
        elements = doc.select('[href]')
        for tag in elements:
            link = tag.get('href')
            if 'tel:' in link and '+' in link:
                continue
            if 'mailto:' in link and '@' in link:
                continue

            abs_url = urljoin(url, link)
            if not PublicTool.is_external_domain(url, abs_url):
                # file
                path_list = link.split('/')
                file = path_list.pop()
                file_check = re.findall(r'([\u4E00-\u9FFF]+|[A-Z]+|\s+)', file)
                if len(file_check) != 0:
                    self.result = False

                    reduct_point = self.POINT * 1
                    self.minus += reduct_point
                    msg = '上傳的檔案中,有的名稱含有 中文 或 大寫 或 空白字元。 file name: {}'.format(file)
                    error_data = et.get_error_data(13, msg, 1, reduct_point)

                    self.errors.append(error_data)

                # folder
                for folder in path_list:
                    folder_check = re.findall(r'([\u4E00-\u9FFF]+|[A-Z]+|\s+)', folder)
                    if len(folder_check) != 0:
                        self.result = False

                        reduct_point = self.POINT * 1
                        self.minus += reduct_point
                        msg = '上傳的資料夾中,有的名稱含有 中文 or 大寫 or 空白字元。 folder name: {}'.format(folder)
                        error_data = et.get_error_data(13, msg, 1, reduct_point)

                        self.errors.append(error_data)

        # img part
        img_list = doc.find_all('img')

        for img in img_list:
            path = img.get('src')
            abs_url = urljoin(self.url, path)
            if PublicTool.is_external_domain(url, abs_url):
                continue
            else:
                check_list = path.split('/')
                # 是否在圖片資料夾
                if 'images' not in check_list and 'img' not in check_list and 'image' not in check_list:
                    self.result = False

                    reduct_point = self.POINT * 1
                    self.minus += reduct_point
                    msg = '圖片未放在圖片資料夾(images or img or image) 或是路徑設定錯誤。 tag: {}'.format(path)
                    error_data = et.get_error_data(15, msg, 1, reduct_point)

                    self.errors.append(error_data)

                # 查看檔名
                for sub_path in check_list:
                    regex_check = re.findall(r'([\u4E00-\u9FFF]+|[A-Z]+|\s+)', sub_path)
                    if len(regex_check) != 0:
                        self.result = False

                        reduct_point = self.POINT * 1
                        self.minus += reduct_point
                        msg = '圖片檔名或是路徑含有中文/大寫/空白字元。 path_string: {}'.format(sub_path)
                        error_data = et.get_error_data(12, msg, 1, reduct_point)

                        self.errors.append(error_data)