예제 #1
0
    def perform(self, document, sourceHTML, sourceURL, encoding=None, copy_style=False):
        aggregate_css = ""

        # Retrieve CSS rel links from html pasted and aggregate into one string
        CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]")
        matching = CSSRelSelector.evaluate(document)
        for element in matching:
            try:
                csspath = element.get("href")
                if len(sourceURL):
                    if element.get("href").lower().find("http://", 0) < 0:
                        parsed_url = urlparse.urlparse(sourceURL)
                        csspath = urlparse.urljoin(parsed_url.scheme + "://" + parsed_url.hostname, csspath)

                css_content = requests.get(csspath).text
                aggregate_css += ''.join(css_content)

                element.getparent().remove(element)
            except:
                raise IOError('The stylesheet ' + element.get("href") + ' could not be found')

        # Include inline style elements
        CSSStyleSelector = CSSSelector("style,Style")
        matching = CSSStyleSelector.evaluate(document)
        for element in matching:
            if element.text:
                aggregate_css += element.text
            element.getparent().remove(element)

        # Convert document to a style dictionary compatible with etree
        styledict = self.get_view(document, aggregate_css)

        # Set inline style attribute if not one of the elements not worth styling
        ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script']
        for element, style in styledict.items():
            if element.tag not in ignore_list:
                v = style.getCssText(separator=u'')
                element.set('style', v)

        if copy_style:
            sheet = cssutils.css.CSSStyleSheet()
            for rule in self.used_rules:
                sheet.add(rule)

            style = etree.HTML("<style>" + sheet.cssText + "</style>")
            document.find('.//body').insert(0, style.find(".//style"))

        self.convertedHTML = etree.tostring(document, method="xml", pretty_print=True, encoding=encoding)
        self.convertedHTML = self.convertedHTML.decode(encoding).replace('&#13;', '')  # Tedious raw conversion of line breaks.
        return self
예제 #2
0
    def perform(self, document, sourceHTML, sourceURL, encoding=None):
        aggregate_css = ""

        # Retrieve CSS rel links from html pasted and aggregate into one string
        CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]")
        matching = CSSRelSelector.evaluate(document)

        for element in matching:
            try:
                csspath = element.get("href")
                if len(sourceURL):
                    if element.get("href").lower().find("http://", 0) < 0:
                        parsed_url = urlparse.urlparse(sourceURL)
                        csspath = urlparse.urljoin(parsed_url.scheme + "://" + parsed_url.hostname, csspath)

                css_content = requests.get(csspath).text

                # section: replace relative url to absolute url in css file
                lr = lambda match_obj: Conversion.replace_url(csspath, match_obj)

                css_content = Conversion.REGEX_URL.sub(lr, css_content)

                # endsection

                aggregate_css += ''.join(css_content)

                element.getparent().remove(element)
            except:
                raise IOError('The stylesheet ' + element.get("href") + ' could not be found')

        # Include inline style elements
        CSSStyleSelector = CSSSelector("style,Style")
        matching = CSSStyleSelector.evaluate(document)
        for element in matching:
            aggregate_css += element.text
            element.getparent().remove(element)

        # Convert document to a style dictionary compatible with etree
        styledict = self.get_view(document, aggregate_css)

        # Set inline style attribute if not one of the elements not worth styling
        ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script']
        for element, style in styledict.items():
            if element.tag not in ignore_list:
                v = style.getCssText(separator=u'')
                element.set('style', v)

        self.convertedHTML = etree.tostring(document, method="xml", pretty_print=True, encoding=encoding)
        self.convertedHTML = self.convertedHTML.decode(encoding).replace('&#13;', '')  # Tedious raw conversion of line breaks.
        return self
예제 #3
0
    def perform(self, document, sourceHTML, sourceURL, encoding='unicode', remove_origin=True):
        aggregate_css = ""

        # Retrieve CSS rel links from html pasted and aggregate into one string
        CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]")
        matching = CSSRelSelector.evaluate(document)
        for element in matching:
            try:
                csspath = element.get("href")
                if len(sourceURL):
                    if element.get("href").lower().find("http://", 0) < 0:
                        parsed_url = urlparse.urlparse(sourceURL)
                        csspath = urlparse.urljoin(parsed_url.scheme + "://" + parsed_url.hostname, csspath)

                # Get css file. Don't verify SSL certificates. It's just a css.
                # Cloudfront does not have correct SSL certificate and it fails.
                css_content = requests.get(csspath, verify=False).text
                aggregate_css += ''.join(css_content)

                if remove_origin:
                    element.getparent().remove(element)
            except:
                raise IOError('The stylesheet ' + element.get("href") + ' could not be found')

        # Include inline style elements
        CSSStyleSelector = CSSSelector("style,Style")
        matching = CSSStyleSelector.evaluate(document)
        for element in matching:
            aggregate_css += element.text

            if remove_origin:
                element.getparent().remove(element)

        # Convert document to a style dictionary compatible with etree
        styledict = self.get_view(document, aggregate_css)

        # Set inline style attribute if not one of the elements not worth styling
        ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script']
        for element, style in styledict.items():
            if element.tag not in ignore_list:
                v = style.getCssText(separator='')
                element.set('style', v)

        self.convertedHTML = etree.tostring(document, method="html", pretty_print=True, encoding=encoding)
        return self
예제 #4
0
    def perform(self,document,sourceHTML,sourceURL):
        aggregateCSS="";

        # retrieve CSS rel links from html pasted and aggregate into one string
        CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]")
        matching = CSSRelSelector.evaluate(document)
        for element in matching:
            try:
                csspath=element.get("href")
                if len(sourceURL):
                    if element.get("href").lower().find("http://",0) < 0:
                        parsedUrl=urlparse.urlparse(sourceURL);
                        csspath=urlparse.urljoin(parsedUrl.scheme+"://"+parsedUrl.hostname, csspath)
                f=urlopen(csspath)
                aggregateCSS+=''.join(f.read())
                element.getparent().remove(element)
            except:
                raise IOError('The stylesheet '+element.get("href")+' could not be found')

        #include inline style elements
        CSSStyleSelector = CSSSelector("style,Style")
        matching = CSSStyleSelector.evaluate(document)
        for element in matching:
            aggregateCSS+=element.text
            element.getparent().remove(element)

        #convert  document to a style dictionary compatible with etree
        styledict = self.getView(document, aggregateCSS)

        #set inline style attribute if not one of the elements not worth styling
        ignoreList=['html','head','title','meta','link','script']
        for element, style in styledict.items():
            if element.tag not in ignoreList:
                v = style.getCssText(separator=u'')
                element.set('style', v)

        #convert tree back to plain text html
        self.convertedHTML = etree.tostring(document, method="xml", pretty_print=True,encoding='UTF-8')
        self.convertedHTML= self.convertedHTML.decode('utf-8').replace('&#13;', '') #tedious raw conversion of line breaks.

        return self
예제 #5
0
    def perform(self, document, sourceHTML, sourceURL, encoding='unicode'):
        aggregate_css = ""

        # Retrieve CSS rel links from html pasted and aggregate into one string
        CSSRelSelector = CSSSelector(
            "link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]")
        matching = CSSRelSelector.evaluate(document)
        for element in matching:
            try:
                csspath = element.get("href")
                if len(sourceURL):
                    if element.get("href").lower().find("http://", 0) < 0:
                        parsed_url = urlparse.urlparse(sourceURL)
                        csspath = urlparse.urljoin(
                            parsed_url.scheme + "://" + parsed_url.hostname,
                            csspath)
                if csspath.startswith("http"):
                    css_content = requests.get(csspath).text
                else:
                    css_content = open(csspath, "r")
                aggregate_css += ''.join(css_content)

                element.getparent().remove(element)
            except:
                raise IOError('The stylesheet ' + element.get("href") +
                              ' could not be found')

        # Include inline style elements
        CSSStyleSelector = CSSSelector("style,Style")
        matching = CSSStyleSelector.evaluate(document)
        for element in matching:
            aggregate_css += element.text
            element.getparent().remove(element)

        # Convert document to a style dictionary compatible with etree
        styledict = self.get_view(document, aggregate_css)

        # Set inline style attribute if not one of the elements not worth styling
        ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script']
        for element, style in styledict.items():
            if element.tag not in ignore_list:
                v = style.getCssText(separator='')
                element.set('style', v)

        self.convertedHTML = etree.tostring(document,
                                            method="xml",
                                            pretty_print=True,
                                            encoding=encoding)
        return self
예제 #6
0
    def get_view(self, document, css):

        view = {}
        specificities = {}
        supportratios = {}
        support_failrate = 0
        support_totalrate = 0
        compliance = dict()

        mycsv = csv.DictReader(open(os.path.join(os.path.dirname(__file__), "css_compliance.csv")), delimiter=',')

        for row in mycsv:
            # Count clients so we can calculate an overall support percentage later
            client_count = len(row)
            compliance[row['property'].strip()] = dict(row)

        # Decrement client count to account for first col which is property name
        client_count -= 1

        sheet = cssutils.parseString(css)

        rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
        for rule in rules:

            for selector in rule.selectorList:
                try:
                    cssselector = CSSSelector(selector.selectorText)
                    matching = cssselector.evaluate(document)

                    for element in matching:
                        # add styles for all matching DOM elements
                        if element not in view:
                            # add initial
                            view[element] = cssutils.css.CSSStyleDeclaration()
                            specificities[element] = {}

                            # add inline style if present
                            inlinestyletext = element.get('style')
                            if inlinestyletext:
                                inlinestyle = cssutils.css.CSSStyleDeclaration(cssText=inlinestyletext)
                            else:
                                inlinestyle = None
                            if inlinestyle:
                                for p in inlinestyle:
                                    # set inline style specificity
                                    view[element].setProperty(p)
                                    specificities[element][p.name] = (1, 0, 0, 0)

                        for p in rule.style:
                            if p.name not in supportratios:
                                supportratios[p.name] = {'usage': 0, 'failedClients': 0}

                            supportratios[p.name]['usage'] += 1

                            try:
                                if p.name not in self.CSSUnsupportErrors:
                                    for client, support in compliance[p.name].items():
                                        if support == "N" or support == "P":
                                            # Increment client failure count for this property
                                            supportratios[p.name]['failedClients'] += 1
                                            if p.name not in self.CSSUnsupportErrors:
                                                if support == "P":
                                                    self.CSSUnsupportErrors[p.name] = [client + ' (partial support)']
                                                else:
                                                    self.CSSUnsupportErrors[p.name] = [client]
                                            else:
                                                if support == "P":
                                                    self.CSSUnsupportErrors[p.name].append(client + ' (partial support)')
                                                else:
                                                    self.CSSUnsupportErrors[p.name].append(client)

                            except KeyError:
                                pass

                            # update styles
                            if p not in view[element]:
                                view[element].setProperty(p.name, p.value, p.priority)
                                specificities[element][p.name] = selector.specificity
                            else:
                                sameprio = (p.priority == view[element].getPropertyPriority(p.name))
                                if not sameprio and bool(p.priority) or (sameprio and selector.specificity >= specificities[element][p.name]):
                                    # later, more specific or higher prio
                                    view[element].setProperty(p.name, p.value, p.priority)

                except ExpressionError:
                    if str(sys.exc_info()[1]) not in self.CSSErrors:
                        self.CSSErrors.append(str(sys.exc_info()[1]))
                    pass

        for props, propvals in supportratios.items():
            support_failrate += (propvals['usage']) * int(propvals['failedClients'])
            support_totalrate += int(propvals['usage']) * client_count

        if support_failrate and support_totalrate:
            self.supportPercentage = 100 - ((float(support_failrate) / float(support_totalrate)) * 100)
        return view
예제 #7
0
    def get_view(self, document, css):

        view = {}
        specificities = {}
        supportratios = {}
        support_failrate = 0
        support_totalrate = 0
        compliance = dict()

        with open(os.path.join(os.path.dirname(__file__),
                               "css_compliance.csv")) as csv_file:
            compat_list = csv_file.readlines()

        mycsv = csv.DictReader(compat_list, delimiter=str(','))

        for row in mycsv:
            # Count clients so we can calculate an overall support percentage later
            client_count = len(row)
            compliance[row['property'].strip()] = dict(row)

        # Decrement client count to account for first col which is property name
        client_count -= 1

        sheet = cssutils.parseString(css)

        rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
        for rule in rules:

            for selector in rule.selectorList:
                try:
                    cssselector = CSSSelector(selector.selectorText)
                    matching = cssselector.evaluate(document)

                    for element in matching:
                        # add styles for all matching DOM elements
                        if element not in view:
                            # add initial
                            view[element] = cssutils.css.CSSStyleDeclaration()
                            specificities[element] = {}

                            # add inline style if present
                            inlinestyletext = element.get('style')
                            if inlinestyletext:
                                inlinestyle = cssutils.css.CSSStyleDeclaration(
                                    cssText=inlinestyletext)
                            else:
                                inlinestyle = None
                            if inlinestyle:
                                for p in inlinestyle:
                                    # set inline style specificity
                                    view[element].setProperty(p)
                                    specificities[element][p.name] = (1, 0, 0,
                                                                      0)

                        for p in rule.style:
                            if p.name not in supportratios:
                                supportratios[p.name] = {
                                    'usage': 0,
                                    'failedClients': 0
                                }

                            supportratios[p.name]['usage'] += 1

                            try:
                                if p.name not in self.CSSUnsupportErrors:
                                    for client, support in compliance[
                                            p.name].items():
                                        if support == "N" or support == "P":
                                            # Increment client failure count for this property
                                            supportratios[
                                                p.name]['failedClients'] += 1
                                            if p.name not in self.CSSUnsupportErrors:
                                                if support == "P":
                                                    self.CSSUnsupportErrors[
                                                        p.name] = [
                                                            client +
                                                            ' (partial support)'
                                                        ]
                                                else:
                                                    self.CSSUnsupportErrors[
                                                        p.name] = [client]
                                            else:
                                                if support == "P":
                                                    self.CSSUnsupportErrors[
                                                        p.name].append(
                                                            client +
                                                            ' (partial support)'
                                                        )
                                                else:
                                                    self.CSSUnsupportErrors[
                                                        p.name].append(client)

                            except KeyError:
                                pass

                            # update styles
                            if p not in view[element]:
                                view[element].setProperty(
                                    p.name, p.value, p.priority)
                                specificities[element][
                                    p.name] = selector.specificity
                            else:
                                sameprio = (p.priority ==
                                            view[element].getPropertyPriority(
                                                p.name))
                                if not sameprio and bool(p.priority) or (
                                        sameprio and selector.specificity >=
                                        specificities[element][p.name]):
                                    # later, more specific or higher prio
                                    view[element].setProperty(
                                        p.name, p.value, p.priority)

                except ExpressionError:
                    if str(sys.exc_info()[1]) not in self.CSSErrors:
                        self.CSSErrors.append(str(sys.exc_info()[1]))
                    pass

        for props, propvals in supportratios.items():
            support_failrate += (propvals['usage']) * int(
                propvals['failedClients'])
            support_totalrate += int(propvals['usage']) * client_count

        if support_failrate and support_totalrate:
            self.supportPercentage = 100 - (
                (float(support_failrate) / float(support_totalrate)) * 100)
        return view