def is_line_height_hidden(self, element): fontHeight = None # 1. 获取自身的font-size大小 height = self.htmlParser.get_element_style_attr_value(element, 'line-height') if height is not None: fontHeight = height.lower() else: # 2. 获取父类中的font-size大小 parent = self.htmlParser.get_all_parent_element(element) for elem in parent: size = self.htmlParser.get_element_style_attr_value(elem, 'line-height') if size is not None: fontHeight = size break if fontHeight is not None: match = re.search(r'([-]?\d+)\s*(\D+)', fontHeight, re.IGNORECASE) if match is not None: # print '%s'%match.group(0) # print '%s'%match.group(1) # print '%s'%match.group(2) if match.group(2).lower() == 'px' \ and match.group(1).isdigit() \ and float(match.group(1)) <= settings.getfloat('LINE_HEIGHT'): return True elif match.group(2).lower() == 'em' \ and match.group(1).isdigit() \ and (float(match.group(1)) * 10) <= settings.getfloat('LINE_HEIGHT'): return True elif match.group(2) == '%' \ and match.group(1).isdigit() \ and float(match.group(1)) <= settings.getfloat('LINE_HEIGHT_PERCENT'): return True else: return False return False
def is_font_size_hidden(self, element): fontSize = None # 1. 获取自身的font-size大小 size = self.htmlParser.get_element_style_attr_value(element, 'font-size') if size is not None: fontSize = size.lower() else: # 2. 获取父类中的font-size大小 parent = self.htmlParser.get_all_parent_element(element) for elem in parent: size = self.htmlParser.get_element_style_attr_value(elem, 'font-size') if size is not None: fontSize = size break if fontSize is not None: match = re.search(r'([-]?\d+)\s*(\D+)', fontSize, re.IGNORECASE) if match is not None: if match.group(2).lower() == 'px' \ and match.group(1).isdigit() \ and float(match.group(1)) <= settings.getfloat('FONT_SIZE'): return True elif match.group(2).lower() == 'em' \ and match.group(1).isdigit() \ and (float(match.group(1)) * 10) <= settings.getfloat('FONT_SIZE'): return True elif match.group(2) == '%' \ and match.group(1).isdigit() \ and float(match.group(1)) <= settings.getfloat('FONT_SIZE_PERCENT'): return True else: return False return False
def is_overflow_height_hidden(self, element): overString = None heightSize = None # 1.获取父类中的overflow、height值 parent = self.htmlParser.get_all_parent_element(element) for elem in parent: overflow = self.htmlParser.get_element_style_attr_value(elem, 'overflow') height = self.htmlParser.get_element_style_attr_value(elem, 'height') if overflow is not None and height is not None: overString = overflow heightSize = height break if overString is not None and heightSize is not None: if overString.lower() == 'hidden': matchNormal = re.search(r'([-]?\d+)\s*(\D+)', heightSize, re.IGNORECASE) matchChange = re.search(r'expression_r\(((\d+-)?\d+)\)', heightSize, re.IGNORECASE) if matchChange is not None: if eval(matchChange.group(1)) <= settings.getfloat('OVER_HEIGHT'): return True else: return False elif matchNormal is not None: if matchNormal.group(2).lower() == 'px' \ and matchNormal.group(1).isdigit() \ and float(matchNormal.group(1)) <= settings.getfloat('OVER_HEIGHT'): return True elif matchNormal.group(2).lower() == 'em' \ and matchNormal.group(1).isdigit() \ and (float(matchNormal.group(1)) * 10) <= settings.getfloat('OVER_HEIGHT'): return True else: return False else: return False return False
def is_text_indent_hidden(self, element): indentSize = None # 1. 获取自身的position、left值 indent = self.htmlParser.get_element_style_attr_value(element, 'text-indent') if indent is not None: indentSize = indent else: # 2.获取父类中的position、left值 parent = self.htmlParser.get_all_parent_element(element) for elem in parent: indent = self.htmlParser.get_element_style_attr_value(elem, 'text-indent') if indent is not None: indentSize = indent break if indentSize is not None: match = re.search(r'([-]?\d+)\s*(\D+)', indentSize, re.IGNORECASE) if match is not None: if match.group(2).lower() == 'px' \ and match.group(1).isdigit() \ and float(match.group(1)) <= settings.getfloat('TEXT_INDENT'): return True elif match.group(2).lower() == 'em' \ and match.group(1).isdigit() \ and (float(match.group(1)) * 10) <= settings.getfloat('TEXT_INDENT'): return True elif match.group(2) == '%' \ and match.group(1).isdigit() \ and float(match.group(1)) <= settings.getfloat('TEXT_INDENT_PERCENT'): return True else: return False return False
def is_marquee_value_hidden(self, element): # 1. 获取父类的标签属性 parent = self.htmlParser.get_all_parent_element(element) for elem in parent: if elem.tag == 'marquee': height = elem.get('height') width = elem.get('width') scrollamount = elem.get('scrollamount') if height is not None and width is not None and scrollamount is not None: if height.isdigit() and width.isdigit() and scrollamount.isdigit() \ and 0 < float(height) <= settings.getfloat('HEIGHT_MAX') \ and 0 < float(width) <= settings.getfloat('WIDTH_MAX') \ and float(scrollamount) >= settings.getfloat('SCROLLAMOUNT_MIN'): return True return False
def is_position_left_hidden(self, element): posString = None leftSize = None # 1. 获取自身的position、left值 position = self.htmlParser.get_element_style_attr_value(element, 'position') left = self.htmlParser.get_element_style_attr_value(element, 'left') if position is not None and left is not None: posString = position leftSize = left else: # 2.获取父类中的position、left值 parent = self.htmlParser.get_all_parent_element(element) for elem in parent: position = self.htmlParser.get_element_style_attr_value(elem, 'position') left = self.htmlParser.get_element_style_attr_value(elem, 'left') if position is not None and left is not None: posString = position leftSize = left break if posString is not None and leftSize is not None: if posString.lower() == 'absolute' or posString.lower() == 'fixed': matchNormal = re.search(r'([-]?\d+)\s*(\D+)', leftSize, re.IGNORECASE) matchChange = re.search(r'expression_r\(((\d+-)?\d+)\)', leftSize, re.IGNORECASE) if matchChange is not None: if eval(matchChange.group(1)) <= settings.getfloat('POSITION_LEFT'): return True else: return False elif matchNormal is not None: if matchNormal.group(2).lower() == 'px' \ and matchNormal.group(1).isdigit() \ and float(matchNormal.group(1)) <= settings.getfloat('POSITION_LEFT'): return True elif matchNormal.group(2).lower() == 'em' \ and matchNormal.group(1).isdigit() \ and (float(matchNormal.group(1)) * 10) <= settings.getfloat('POSITION_LEFT'): return True elif matchNormal.group(2) == '%' \ and matchNormal.group(1).isdigit() \ and float(matchNormal.group(1)) <= settings.getfloat('POSITION_LEFT_PERCENT'): return True else: return False else: return False return False
def get_detect_html(self): ''' 描述: 通过浏览器获取当前页面的HTML内容 ''' try: import socket timeout = settings.getfloat('HTML_TIMEOUT') socket.setdefaulttimeout(timeout) except Exception, e: raise DarkException, _('Failed to import socket to set timeout. Exception: %(exception)s.' % {'exception': str(e)})