def test_children(self):
        "CSSStyleDeclaration.children()"
        style = '/*1*/color: red; color: green; @x;'
        types = [
            cssutils.css.CSSComment,
            cssutils.css.Property,
            cssutils.css.Property,
            cssutils.css.CSSUnknownRule,
        ]

        def t(s):
            for i, x in enumerate(s.children()):
                self.assertEqual(types[i], type(x))
                self.assertEqual(x.parent, s)

        t(cssutils.parseStyle(style))
        t(cssutils.parseString('a {' + style + '}').cssRules[0].style)
        t(
            cssutils.parseString('@media all {a {' + style +
                                 '}}').cssRules[0].cssRules[0].style)

        s = cssutils.parseStyle(style)
        s['x'] = '0'
        self.assertEqual(s, s.getProperty('x').parent)
        s.setProperty('y', '1')
        self.assertEqual(s, s.getProperty('y').parent)
Esempio n. 2
0
    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff: continue
            style = parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src: continue
            style = parseStyle('background-image:%s'%src, validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            if not src.startswith('file://'):
                self.log.warn('Unknown URI in @font-face: %r'%src)
                continue
            src = src[len('file://'):]
            if iswindows and src.startswith('/'):
                src = src[1:]
            src = src.replace('/', os.sep)
            src = unquote(src)
            name = self.container.abspath_to_name(src)
            if not self.container.has_name(name):
                self.log.warn('Font %r referenced in @font-face rule not found'
                              %name)
                continue
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules:
            return

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        exclude = {'\n', '\r', '\t'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text: continue
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
Esempio n. 3
0
def set_style_property(tag, property_name, value, editor):
    '''
    Set a style property, i.e. a CSS property inside the style attribute of the tag.
    Any existing style attribute is updated or a new attribute is inserted.
    '''
    block, offset = find_attribute_in_tag(tag.start_block, tag.start_offset + 1, 'style')
    c = editor.textCursor()
    def css(d):
        return d.cssText.replace('\n', ' ')
    if block is None or offset is None:
        d = parseStyle('')
        d.setProperty(property_name, value)
        c.setPosition(tag.end_block.position() + tag.end_offset)
        c.insertText(' style="%s"' % css(d))
    else:
        c.setPosition(block.position() + offset - 1)
        end_block, end_offset = find_end_of_attribute(block, offset + 1)
        if end_block is None:
            return error_dialog(editor, _('Invalid markup'), _(
                'The current block tag has an existing unclosed style attribute. Run the Fix HTML'
                ' tool first.'), show=True)
        c.setPosition(end_block.position() + end_offset, c.KeepAnchor)
        d = parseStyle(editor.selected_text_from_cursor(c)[1:-1])
        d.setProperty(property_name, value)
        c.insertText('"%s"' % css(d))
Esempio n. 4
0
def set_style_property(tag, property_name, value, editor):
    '''
    Set a style property, i.e. a CSS property inside the style attribute of the tag.
    Any existing style attribute is updated or a new attribute is inserted.
    '''
    block, offset = find_attribute_in_tag(tag.start_block, tag.start_offset + 1, 'style')
    c = editor.textCursor()
    def css(d):
        return d.cssText.replace('\n', ' ')
    if block is None or offset is None:
        d = parseStyle('')
        d.setProperty(property_name, value)
        c.setPosition(tag.end_block.position() + tag.end_offset)
        c.insertText(' style="%s"' % css(d))
    else:
        c.setPosition(block.position() + offset - 1)
        end_block, end_offset = find_end_of_attribute(block, offset + 1)
        if end_block is None:
            return error_dialog(editor, _('Invalid markup'), _(
                'The current block tag has an existing unclosed style attribute. Run the Fix HTML'
                ' tool first.'), show=True)
        c.setPosition(end_block.position() + end_offset, c.KeepAnchor)
        d = parseStyle(editor.selected_text_from_cursor(c)[1:-1])
        d.setProperty(property_name, value)
        c.insertText('"%s"' % css(d))
    def test_children(self):
        "CSSStyleDeclaration.children()"
        style = u'/*1*/color: red; color: green; @x;'
        types = [
            (cssutils.css.CSSComment, None),
            (cssutils.css.Property, 'parentStyle'),  #DEPRECATED
            (cssutils.css.Property, 'parentStyle'),  #DEPRECATED
            (cssutils.css.CSSUnknownRule, None)
        ]

        def t(s):
            for i, x in enumerate(s.children()):
                self.assertEqual(types[i][0], type(x))
                self.assertEqual(s, x.parent)

                if types[i][1]:
                    #DEPRECATED
                    self.assertEqual(s, getattr(x, types[i][1]))

        t(cssutils.parseStyle(style))
        t(cssutils.parseString(u'a {' + style + '}').cssRules[0].style)
        t(
            cssutils.parseString(u'@media all {a {' + style +
                                 '}}').cssRules[0].cssRules[0].style)

        s = cssutils.parseStyle(style)
        s['x'] = '0'
        self.assertEqual(s, s.getProperty('x').parent)
        s.setProperty('y', '1')
        self.assertEqual(s, s.getProperty('y').parent)
Esempio n. 6
0
 def test_border_condensation(self):
     vals = "red solid 5px"
     css = "; ".join(
         "border-%s-%s: %s" % (edge, p, v) for edge in EDGES for p, v in zip(BORDER_PROPS, vals.split())
     )
     style = parseStyle(css)
     condense_rule(style)
     for e, p in product(EDGES, BORDER_PROPS):
         self.assertFalse(style.getProperty("border-%s-%s" % (e, p)))
         self.assertFalse(style.getProperty("border-%s" % e))
         self.assertFalse(style.getProperty("border-%s" % p))
     self.assertEqual(style.getProperty("border").value, vals)
     css = "; ".join(
         "border-%s-%s: %s" % (edge, p, v) for edge in ("top",) for p, v in zip(BORDER_PROPS, vals.split())
     )
     style = parseStyle(css)
     condense_rule(style)
     self.assertEqual(style.cssText, "border-top: %s" % vals)
     css += ";" + "; ".join(
         "border-%s-%s: %s" % (edge, p, v)
         for edge in ("right", "left", "bottom")
         for p, v in zip(BORDER_PROPS, vals.replace("red", "green").split())
     )
     style = parseStyle(css)
     condense_rule(style)
     self.assertEqual(len(style.getProperties()), 4)
     self.assertEqual(style.getProperty("border-top").value, vals)
     self.assertEqual(style.getProperty("border-left").value, vals.replace("red", "green"))
Esempio n. 7
0
 def test_border_condensation(self):
     vals = 'red solid 5px'
     css = '; '.join('border-%s-%s: %s' % (edge, p, v) for edge in EDGES
                     for p, v in zip(BORDER_PROPS, vals.split()))
     style = parseStyle(css)
     condense_rule(style)
     for e, p in product(EDGES, BORDER_PROPS):
         self.assertFalse(style.getProperty('border-%s-%s' % (e, p)))
         self.assertFalse(style.getProperty('border-%s' % e))
         self.assertFalse(style.getProperty('border-%s' % p))
     self.assertEqual(style.getProperty('border').value, vals)
     css = '; '.join('border-%s-%s: %s' % (edge, p, v)
                     for edge in ('top', )
                     for p, v in zip(BORDER_PROPS, vals.split()))
     style = parseStyle(css)
     condense_rule(style)
     self.assertEqual(style.cssText, 'border-top: %s' % vals)
     css += ';' + '; '.join(
         'border-%s-%s: %s' % (edge, p, v)
         for edge in ('right', 'left', 'bottom')
         for p, v in zip(BORDER_PROPS,
                         vals.replace('red', 'green').split()))
     style = parseStyle(css)
     condense_rule(style)
     self.assertEqual(len(style.getProperties()), 4)
     self.assertEqual(style.getProperty('border-top').value, vals)
     self.assertEqual(
         style.getProperty('border-left').value,
         vals.replace('red', 'green'))
 def test_children(self):
     "CSSStyleDeclaration.children()"
     style = u'/*1*/color: red; color: green; @x;'
     types = [
         (cssutils.css.CSSComment, None), 
         (cssutils.css.Property, 'parentStyle'), #DEPRECATED
         (cssutils.css.Property, 'parentStyle'), #DEPRECATED
         (cssutils.css.CSSUnknownRule, None)
     ] 
     def t(s):
         for i, x in enumerate(s.children()):
             self.assertEqual(types[i][0], type(x))
             self.assertEqual(s, x.parent)
             
             if types[i][1]:
                 #DEPRECATED
                 self.assertEqual(s, getattr(x, types[i][1]))
                 
     t(cssutils.parseStyle(style))
     t(cssutils.parseString(u'a {'+style+'}').cssRules[0].style)
     t(cssutils.parseString(u'@media all {a {'+style+'}}').cssRules[0].cssRules[0].style)
             
     s = cssutils.parseStyle(style)
     s['x'] = '0'
     self.assertEqual(s, s.getProperty('x').parent)
     s.setProperty('y', '1')
     self.assertEqual(s, s.getProperty('y').parent)
Esempio n. 9
0
    def abstract_diagram(self, tile, tileset=None):
        tilediagfile = etree.parse(
            pkg_resources.resource_stream(
                __name__,
                os.path.join('seqdiagrambases',
                             '{}-abstract.svg'.format(self._abase))))

        tilediag = tilediagfile.getroot().find("./*[@class='tile']")

        c = tile.get('color', None)
        if c is None:
            fill = None
        elif c[0] == "#":
            fill = c
        else:
            fill = xcolors.get(c, None)

        tilediag.find("./*[@class='tilename']").text = tile.name
        if fill:
            s = cssutils.parseStyle(
                tilediag.find("./*[@class='tilerect']").attrib['style'])
            s['fill'] = fill
            tilediag.find("./*[@class='tilerect']").attrib['style'] = s.cssText
        if self._orient:
            tilediag.find("./*[@class='type_sw']").text = self._orient[0]
            tilediag.find("./*[@class='type_ne']").text = self._orient[1]

        if not tileset:
            return (tilediag, 1)

        for endn, loc in zip(tile.ends, self._a_endlocs):
            if endn in tileset.ends.keys():
                end = tileset.ends[endn]
            elif endn[:-1] in tileset['ends'].keys() and endn[-1] == '/':
                end = tileset['ends'][endn[:-1]]
            else:
                end = None
            tilediag.find("./*[@class='endname_{}']".format(loc)).text = endn
            if end and ('color' in end.keys()):
                ec = tilediag.find("./*[@class='endcolor_{}']".format(loc))
                s = cssutils.parseStyle(ec.attrib['style'])
                c = end.get('color', None)
                if c is None:
                    fill = None
                elif c[0] == "#":
                    fill = c
                else:
                    fill = xcolors.get(c, None)
                s['fill'] = fill
                ec.attrib['style'] = s.getCssText('')

        return (tilediag, 1)
Esempio n. 10
0
    def test_parseStyle(self):
        "cssutils.parseStyle()"
        s = cssutils.parseStyle('x:0; y:red')
        self.assertEqual(type(s), cssutils.css.CSSStyleDeclaration)
        self.assertRaises(xml.dom.SyntaxErr, cssutils.parseStyle,
                          '@import "x";')

        tests = [(u'content: "ä"', 'iso-8859-1'), (u'content: "€"', 'utf-8')]
        for v, e in tests:
            s = cssutils.parseStyle(v.encode(e), encoding=e)
            self.assertEqual(s.cssText, v)

        self.assertRaises(UnicodeDecodeError, cssutils.parseStyle,
                          u'content: "ä"'.encode('utf-8'), 'ascii')
Esempio n. 11
0
    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception(
                'Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff: continue
            style = parseStyle('font-family:%s' % ff, validate=False)
            ff = [
                x.value for x in style.getProperty('font-family').propertyValue
            ]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src: continue
            style = parseStyle('background-image:%s' % src, validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            name = self.href_to_name(src, '@font-face rule')
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules:
            return

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        exclude = {'\n', '\r', '\t'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text: continue
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
Esempio n. 12
0
 def __find_types(self, table):
     for td in table.find_all('td'):
         span = td.find('a')
         if span:
             type_text = span.text
             type_color = cssutils.parseStyle(td['style'])['background']
             self.update_data([[type_text, type_color]])
Esempio n. 13
0
    def test_replaceUrls(self):
        "cssutils.replaceUrls()"
        cssutils.ser.prefs.keepAllProperties = True

        css='''
        @import "im1";
        @import url(im2);
        a {
            background-image: url(c) !important;
            background-\image: url(b);
            background: url(a) no-repeat !important;
            }'''
        s = cssutils.parseString(css)
        cssutils.replaceUrls(s, lambda old: "NEW" + old)
        self.assertEqual(u'@import "NEWim1";', s.cssRules[0].cssText)
        self.assertEqual(u'NEWim2', s.cssRules[1].href)
        self.assertEqual(u'''background-image: url(NEWc) !important;
background-\\image: url(NEWb);
background: url(NEWa) no-repeat !important''', s.cssRules[2].style.cssText)

        cssutils.ser.prefs.keepAllProperties = False

        # CSSStyleDeclaration
        style = cssutils.parseStyle(u'''color: red;
                                        background-image:
                                            url(1.png),
                                            url('2.png')''')
        cssutils.replaceUrls(style, lambda url: 'prefix/'+url)
        self.assertEqual(style.cssText, u'''color: red;
background-image: url(prefix/1.png), url(prefix/2.png)''')
Esempio n. 14
0
 def exceptresp():
     try:
         description = ""
         imgurl = ""
         #first method
         resp = requests.get(
             'https://www.facebook.com/video.php?v=' + self._video_id)
         textsoup = BeautifulSoup(resp.content, "html5lib")
         description = textsoup.find(
             "meta",  property="og:description").get("content", "")
         imgurl = textsoup.find(
             "meta",  property="og:image").get("content", "")
         if not imgurl:
             resp = requests.get(
                 'https://www.facebook.com/video/embed?video_id=' + self._video_id)
             textsoup = BeautifulSoup(resp.content, "html5lib")
             firstdiv = textsoup.body.find('div')
             firstimgstyle = textsoup.body.find('img').get('style')
             style = cssutils.parseStyle(firstimgstyle)
             imgurl = style['background-image']
             imgurl = imgurl.replace('url(', '').replace(')', '')
             if 'uiBoxRed' in firstdiv.get("class", "uiBoxRed"):
                 return False
         self._data["status"] = True
         self._results = {
             'title': "",
             'description': description,
             'duration': '',
             'status': True,
             'image': imgurl
             }
         return True
     except:
         return False
Esempio n. 15
0
def background_image_finder(pipeline_index,
                            soup,
                            finder_image_urls=[],
                            *args, **kwargs):
    """
    Find image URL in background-image

    Example:
    <div style="width: 100%; height: 100%; background-image: url(http://distilleryimage10.ak.instagram.com/bde04558a43b11e28e5d22000a1f979a_7.jpg);" class="Image iLoaded iWithTransition Frame" src="http://distilleryimage10.ak.instagram.com/bde04558a43b11e28e5d22000a1f979a_7.jpg"></div>
    to
    http://distilleryimage10.ak.instagram.com/bde04558a43b11e28e5d22000a1f979a_7.jpg
    """

    now_finder_image_urls = []

    for tag in soup.find_all(style=True):
        style_string = tag['style']
        if 'background-image' in style_string.lower():
            style = cssutils.parseStyle(style_string)
            background_image = style.getProperty('background-image')
            if background_image:
                for property_value in background_image.propertyValue:
                    background_image_url = str(property_value.value)
                    if background_image_url:
                        if (background_image_url not in finder_image_urls) and \
                           (background_image_url not in now_finder_image_urls):
                            now_finder_image_urls.append(background_image_url)

    output = {}
    output['finder_image_urls'] = finder_image_urls + now_finder_image_urls

    return output
Esempio n. 16
0
    def _build_format(self, element):
        args = {}

        for attribute, value in element.attrs.items():
            if attribute == "class" and value:
                # This selects the first one which isn't an empty string. We could handle multiple classes here somehow.
                vals = [v for v in value if v]
                if vals:
                    args["style"] = vals
            elif attribute == "style":
                styles = cssutils.parseStyle(value)

                for name in Format.NESTED_STYLES:
                    args[name] = defaultdict(str)

                for style in styles:
                    for nested_name in Format.NESTED_STYLES:
                        nested_name_with_dash = nested_name + "-"
                        if style.name.startswith(nested_name_with_dash):
                            args[nested_name][style.name.replace(
                                nested_name_with_dash, "")] = style.value
                            break
                    else:
                        name = style.name.lower().replace("-", "_")
                        if name in Format.NESTED_STYLES:
                            # Not supported. Use explicit 'margin-right',
                            # 'margin-left' etc rather than just 'margin'.
                            continue
                        elif name in Format.FORMAT_ALIASES:
                            name = Format.FORMAT_ALIASES[name]

                        if name in Format.optional:
                            args[name] = style.value.strip()
        return Format(**args)
Esempio n. 17
0
    def process_images_and_emojis(self, soup):
        # process images & emojis
        cache_images = True
        for img in soup.findAll("img"):
            if img.has_attr("src"):
                if cache_images and "data:image" not in img["src"]:
                    img_src = img["src"]
                    # if the path starts with /, it's one of notion's predefined images
                    if img["src"].startswith("/"):
                        img_src = f'https://www.notion.so{img["src"]}'
                        # notion's own default images urls are in a weird format, need to sanitize them
                        # img_src = 'https://www.notion.so' + img['src'].split("notion.so")[-1].replace("notion.so", "").split("?")[0]
                        # if (not '.amazonaws' in img_src):
                        # img_src = urllib.parse.unquote(img_src)

                    cached_image = self.cache_file(img_src)
                    img["src"] = cached_image
                elif img["src"].startswith("/"):
                    img["src"] = f'https://www.notion.so{img["src"]}'

            # on emoji images, cache their sprite sheet and re-set their background url
            if img.has_attr("class") and "notion-emoji" in img["class"]:
                style = cssutils.parseStyle(img["style"])
                spritesheet = style["background"]
                spritesheet_url = spritesheet[
                    spritesheet.find("(") + 1 : spritesheet.find(")")
                ]
                cached_spritesheet_url = self.cache_file(
                    f"https://www.notion.so{spritesheet_url}"
                )

                style["background"] = spritesheet.replace(
                    spritesheet_url, str(cached_spritesheet_url)
                )
                img["style"] = style.cssText
Esempio n. 18
0
    def clean_html(self, html):
        if not isinstance(html, unicode):
            raise ValueError('We only support cleaning unicode HTML fragments')

        #We wrap the content up in an extra div tag (otherwise lxml does wierd things to it - like adding in <p> tags and stuff)
        divnode = fromstring(u'<div>' + html + u'</div>')
        self(divnode)

        # Strip all class attributes
        etree.strip_attributes(divnode, 'class')

        for style in divnode.xpath("//@style"):
            parent = style.getparent()
            try:
                cssStyle = cssutils.parseStyle(style)
            except Exception, e:
                logging.info("Style %s failed to parse with error %s." %
                             (style, e))
                parent.attrib.pop('style', None)
                continue

            # Set the line separator so that the style gets serialized
            cssutils.ser.prefs.lineSeparator = ''
            # Only allow valid style properties
            cssutils.ser.prefs.validOnly = True

            new_style = cssStyle.cssText
            if not new_style.strip():
                parent.attrib.pop('style', None)
            else:
                parent.attrib['style'] = new_style
Esempio n. 19
0
def to_ast(css):
    """
    Our "AST" is a set of (property, value, priority) tuples, one per CSS
    statement. Value strings are normalized by cssutils.
    """
    parsed = cssutils.parseStyle(css)
    return frozenset(simplify(x) for x in parsed.children())
Esempio n. 20
0
 def handle_html_content(self, content):
     soup = BeautifulSoup(content, 'html.parser')
     for p_elem in soup.find_all('p'):
         css = None
         if 'style' in p_elem.attrs:
             css = cssutils.parseStyle(p_elem.attrs['style'])
         text_list = p_elem.text.split()
         p_new = soup.new_tag('p', style=css.cssText if css else None)
         for idx, word in enumerate(text_list):
             if len(self.dorks) <= 0:
                 self.dorks = yield from self.get_dorks()
             word += ' '
             if idx % 5 == 0:
                 a_tag = soup.new_tag(
                     'a',
                     href=self.dorks.pop(),
                     style='color:{color};text-decoration:none;cursor:text;'.format(
                         color=css.color if css and 'color' in css.keys() else '#000000'
                     )
                 )
                 a_tag.string = word
                 p_new.append(a_tag)
             else:
                 p_new.append(soup.new_string(word))
         p_elem.replace_with(p_new)
     content = soup.encode('utf-8')
     return content
    def test_parseStyle(self):
        "cssutils.parseStyle()"
        s = cssutils.parseStyle('x:0; y:red')
        self.assertEqual(type(s), cssutils.css.CSSStyleDeclaration)
        self.assertRaises(xml.dom.SyntaxErr, cssutils.parseStyle, '@import "x";')

        tests = [
            (u'content: "ä"', 'iso-8859-1'),
            (u'content: "€"', 'utf-8')
        ]
        for v, e in tests:
            s = cssutils.parseStyle(v.encode(e), encoding=e)
            self.assertEqual(s.cssText, v)

        self.assertRaises(UnicodeDecodeError, cssutils.parseStyle, 
                          u'content: "ä"'.encode('utf-8'), 'ascii')
Esempio n. 22
0
def getPropertyLink(page_source, type_property, type_buy):
    soupProperty = BeautifulSoup(page_source, 'html5lib')
    divPropertyInfoContainer = soupProperty.find_all(
        'div', attrs={"class": "srpBlockListRow"})
    for info in divPropertyInfoContainer:
        heading = info.find_all('p', attrs={"class": "proHeading"})
        for data in heading:
            links = data.find_all('a', href=True)
        for link in links:
            print link.get_text()
            print link['href']
            propertyURL = 'http://www.magicbricks.com' + link['href']
            print propertyURL

        try:
            propertyImgUrlStyle = info.find('div',
                                            attrs={"class":
                                                   "thumbnailBG"})['style']
            style = cssutils.parseStyle(propertyImgUrlStyle)
            propertyImgUrl = style['background-image']
            propertyImgUrl = propertyImgUrl.replace('url(',
                                                    '').replace(')', '')
            print propertyImgUrl
            imgURL = propertyImgUrl
            HousesLink.append({
                "PropertyUrl": propertyURL,
                "PropertyImg": imgURL,
                "BuyType": type_buy
            })
        except:
            HousesLink.append({
                "PropertyUrl": propertyURL,
                "PropertyImg": '',
                "BuyType": type_buy
            })
Esempio n. 23
0
def _extract_colors(html: str) -> List[swatch.RgbColor]:
    try:
        result = []

        soup = BeautifulSoup(html, "html.parser")
        theme_panel_tag = soup.find(
            class_=re.compile("^Colorwheel__themepanel___*"))
        swatch_tags = theme_panel_tag.find_all(
            class_=re.compile("^Swatch__swatch___*"))

        for swatch_tag in swatch_tags:
            # Extracting CSS background color (RGB mode)
            css_rgb_color = cssutils.parseStyle(swatch_tag["style"]).background

            # Extracting RGB color ingredients
            rgb = [int(i) for i in re.findall(r"\d{1,3}", css_rgb_color)]

            # Creating RGB color
            result.append(
                swatch.RgbColor(name='', red=rgb[0], green=rgb[1],
                                blue=rgb[2]))

        return result
    except:
        raise SwatchException
Esempio n. 24
0
    def get_player_data(self, url, initial_data = None):
        player_data = {}

        if initial_data is not None:
            player_data = initial_data

        try:
            self.driver.get(url)
            page = BeautifulSoup(self.driver.page_source, 'html.parser')

            player_data['first_name'] = page.find(class_ = 'firstName').get_text()
            player_data['last_name'] = page.find(class_ = 'lastName').get_text()
            player_data['position'] = page.find(class_ = 'playerPosition').get_text()

            table = page.find(class_ = 'profiletable').find_all(class_ = 'row')
            player_data['name'] = table[0].find_all('div')[1].get_text()
            player_data['nationality'] = table[1].find('nationality-flags').find('span').get_text().strip()
            player_data['birth_date'] = table[2].find_all('div')[1].get_text().replace('.', '-')
            player_data['height'] = table[4].find_all('div')[1].get_text().replace(' cm', '')
            image_style = page.find(class_ = 'playerImage')['style']
            style = cssutils.parseStyle(image_style)['background-image']
            player_data['image_url'] = style.replace('url(', '').replace(')', '')

            if self.verbose:
                print('Scraped player:', player_data['name'])            
            return player_data
        except Exception as ex:
            error = str(ex)
            print(error)
            return { 'error': error }
Esempio n. 25
0
    def parse(self, response):

        result = {"results": []}

        tags = response.css('{tag}::attr(style)'.format(tag=tag)).getall()
        for t in tags:
            arsed_css = dict(cssutils.parseStyle(t))
            if css_property in arsed_css:
                property_value = arsed_css[css_property]

                if condition == "if_equal":
                    _css_value = css_value
                    if str(css_value).isdigit() and str(property_value).isdigit():
                        property_value = float(property_value)
                        _css_value = float(css_value)
                    if property_value == _css_value:
                        result["results"].append({condition: "{}:{}".format(css_property, property_value)})

                elif condition == "if_lees_than":
                    if float(property_value) < float(css_value):
                        result["results"].append({condition: "{}:{}".format(css_property, property_value)})

                elif condition == "if_more_than":
                    if float(property_value) > float(css_value):
                        result["results"].append({condition: "{}:{}".format(css_property, property_value)})

        with open("{output}".format(output=output), "w+") as output_file:
            output_file.write(json.dumps(result))
Esempio n. 26
0
 def test_edge_condensation(self):
     for s, v in {
         (1, 1, 3): None,
         (1, 2, 3, 4): '2pt 3pt 4pt 1pt',
         (1, 2, 3, 2): '2pt 3pt 2pt 1pt',
         (1, 2, 1, 3): '2pt 1pt 3pt',
         (1, 2, 1, 2): '2pt 1pt',
         (1, 1, 1, 1): '1pt',
         ('2%', '2%', '2%', '2%'): '2%',
             tuple('0 0 0 0'.split()): '0',
     }.items():
         for prefix in ('margin', 'padding'):
             css = {
                 '%s-%s' % (prefix, x):
                 str(y) + 'pt' if isinstance(y, (int, float)) else y
                 for x, y in zip(('left', 'top', 'right', 'bottom'), s)
             }
             css = '; '.join(('%s:%s' % (k, v) for k, v in css.items()))
             style = parseStyle(css)
             condense_rule(style)
             val = getattr(style.getProperty(prefix), 'value', None)
             self.assertEqual(v, val)
             if val is not None:
                 for edge in EDGES:
                     self.assertFalse(
                         getattr(
                             style.getProperty('%s-%s' %
                                               (prefix, edge)), 'value',
                             None))
Esempio n. 27
0
def mars_featured_image():
    #Initializing the browser by calling init_browser
    browser = init_browser()
    # Visit mars.nasa.gov/news
    url_to_scrape = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url_to_scrape)
    # Time delay of 2 secs to make sure the browser loads
    time.sleep(2)
    # Find the button "full image" and instantiate button click
    button = browser.find_by_id("full_image")
    button.click()
    # Scrape page into Soup
    html = browser.html
    # Create BeautifulSoup object; parsed with 'html.parser'
    soup = BeautifulSoup(html, 'html.parser')
    # Scrape the background image url
    url = cssutils.parseStyle(
        soup.find('article')['style'])['background-image']
    # Remove extra stuff from the url
    url = url.replace('url(', '').replace(')', '')
    # Define the base_url
    base_url = 'https://www.jpl.nasa.gov'
    # Create the url for the background image
    image_url = base_url + url
    # Close the browser after scraping
    browser.quit()
    # Return scraped item
    return image_url
Esempio n. 28
0
    def scrape_item_details(self, divs):
        section = divs[0].find("span").text.strip()
        exhibitions = divs[1].find_all(class_="card exhibition-card")
        e = []
        for _ in exhibitions:
            payment_type = _.find(class_="exhibition-payment-type").text.strip()
            title = _.find(class_="exhibition-title d-flex flex-column").text.strip()
            date = _.find(class_="exhibition-date").text.strip()
            description = _.find(class_="exhibition-description").text.strip()
            div_style = _.find(class_="w-100").find("div")["style"]
            style = cssutils.parseStyle(div_style)
            url = style["background-image"]
            img = url.replace("url(", "").replace(")", "")
            img_url = f"https://www.nationalgallery.org.uk/{img}"

            e.append(
                {
                    "id": sha256(img_url.encode()).hexdigest(),
                    "payment_type": payment_type,
                    "title": title,
                    "date": date,
                    "description": description,
                    "img": img_url,
                }
            )

        return {"section": section, "exhibitions": e}
Esempio n. 29
0
    def test_replaceUrls(self):
        "cssutils.replaceUrls()"
        cssutils.ser.prefs.keepAllProperties = True

        css = '''
        @import "im1";
        @import url(im2);
        a {
            background-image: url(c) !important;
            background-\image: url(b);
            background: url(a) no-repeat !important;
            }'''
        s = cssutils.parseString(css)
        cssutils.replaceUrls(s, lambda old: "NEW" + old)
        self.assertEqual(u'@import "NEWim1";', s.cssRules[0].cssText)
        self.assertEqual(u'NEWim2', s.cssRules[1].href)
        self.assertEqual(
            u'''background-image: url(NEWc) !important;
background-\\image: url(NEWb);
background: url(NEWa) no-repeat !important''', s.cssRules[2].style.cssText)

        cssutils.ser.prefs.keepAllProperties = False

        # CSSStyleDeclaration
        style = cssutils.parseStyle(u'''color: red;
                                        background-image:
                                            url(1.png),
                                            url('2.png')''')
        cssutils.replaceUrls(style, lambda url: 'prefix/' + url)
        self.assertEqual(
            style.cssText, u'''color: red;
background-image: url(prefix/1.png), url(prefix/2.png)''')
def getdata_hackerearth():
    url = "https://www.hackerearth.com/challenges/?filters=competitive%2Chackathon%2Chiring%2Cuniversity"

    source = requests.get(url).text
    soup = BeautifulSoup(source, 'lxml')

    opportunitiesList = soup.find(id="challenge-container").find(
        "div", class_="upcoming challenge-list").find_all(
            "div", class_="challenge-card-modern")

    opportunity_names = []
    opportunity_link = []
    opportunity_image = []
    opportunity_tags = []
    opportunity_starttime = []
    opportunity_endtime = []

    for opportunity in opportunitiesList:
        try:
            name = opportunity.find(
                "span",
                class_="challenge-list-title challenge-card-wrapper").text
            link = opportunity.find("a").get("href")
            #getting image from style
            div_style = opportunity.find("div", class_="event-image")['style']
            style = cssutils.parseStyle(div_style)
            image = style['background-image'].replace('url(',
                                                      '').replace(')', '')
            tags = opportunity.find("div",
                                    class_="challenge-type").text.strip()
            #To scrape start and end date
            source = requests.get(link).text
            soup = BeautifulSoup(source, 'lxml')
            try:
                starttime = soup.find(
                    "div", class_="start-time-block").find_all("div")[1].text
                endtime = soup.find(
                    "div", class_="end-time-block").find_all("div")[1].text
            except:
                starttime = soup.find("div", class_="event-timings").find_all(
                    "span", class_="timing-text")[0].text
                endtime = soup.find("div", class_="event-timings").find_all(
                    "span", class_="timing-text")[1].text
            start = cleantime(starttime)
            end = cleantime(endtime)
            #adding data to the list
            if filterdata(soup.find(id="overview")):
                opportunity_names.append(name)
                opportunity_link.append(link)
                opportunity_image.append(image)
                opportunity_tags.append(tags)
                opportunity_starttime.append(start)
                opportunity_endtime.append(end)
        except:
            print("inconsistent panel")

    return [
        opportunity_names, opportunity_link, opportunity_image,
        opportunity_tags, opportunity_starttime, opportunity_endtime
    ]
Esempio n. 31
0
    def check_page(self, page_path):
        """
        checks a page for links
        :param page_path: path to page
        :return: list of links on a page
        """
        if self.verbose:
            print('Pulling links from: %s' % page_path)
        s_content = LinkChecker.parse_page(page_path)
        # check img tags
        img_links = s_content.find_all('img', class_=lambda x: x != 'no-check', src=True)
        img_links = [img['src'] for img in img_links]

        # check css backgrounds
        e_style = s_content.find_all(
            lambda tag: len(tag.attrs) and 'style' in tag.attrs and 'url' in tag['style'])
        for element in e_style:
            style = cssutils.parseStyle(element['style'])
            if (style['background-image'] or style['background']) \
                    and (re.search(r'(png|jpg|jpe?g|gif)', style.cssText, flags=re.IGNORECASE)):
                bgi = style['background-image'] if style['background-image'] \
                    else style['background']
                bgi = re.sub(r'(.*?\(\'*\"*)(.*?\.)(png|jpg|jpe?g|gif)(.*?\'*\"*\))',
                             r'\2\3', bgi, flags=re.I)
                bgi = re.sub(r'(.*?\.png|gif|jpe?g)(.*)', r'\1', bgi, flags=re.I)
                img_links.append(bgi)
        return img_links
Esempio n. 32
0
def append_list(list_name, elem_url):
    image_url = ''
    if elem_url is not None:
        # from image element extract url using cssutils
        style = cssutils.parseStyle(elem_url.get('style'))
        image_url = style['background-image']
        # cleaning the url IMPORTANT: This is getting url with 128 by 128 change it to 400 by 400
        image_url = image_url.replace('url(', '').replace(')', '')

    # append in corresponding list
    if list_name == 0:
        pic_url_0.append(image_url)
    if list_name == 1:
        pic_url_1.append(image_url)
    if list_name == 2:
        pic_url_2.append(image_url)
    if list_name == 3:
        pic_url_3.append(image_url)
    if list_name == 4:
        pic_url_4.append(image_url)
    if list_name == 5:
        pic_url_5.append(image_url)
    if list_name == 6:
        pic_url_6.append(image_url)
    if list_name == 7:
        pic_url_7.append(image_url)
    if list_name == 8:
        pic_url_8.append(image_url)
    if list_name == 9:
        pic_url_9.append(image_url)
Esempio n. 33
0
    def check_page(self, page_path):
        """
        checks a page for links
        :param page_path: path to page
        :return: list of links on a page
        """
        if self.verbose:
            print('Pulling links from: %s' % page_path)
        s_content = LinkChecker.parse_page(page_path)
        # check img tags
        img_links = s_content.find_all('img',
                                       class_=lambda x: x != 'no-check',
                                       src=True)
        img_links = [img['src'] for img in img_links]

        # check css backgrounds
        e_style = s_content.find_all(lambda tag: len(tag.attrs) and 'style' in
                                     tag.attrs and 'url' in tag['style'])
        for element in e_style:
            style = cssutils.parseStyle(element['style'])
            if (style['background-image'] or style['background']) \
                    and (re.search(r'(png|jpg|jpe?g|gif)', style.cssText, flags=re.IGNORECASE)):
                bgi = style['background-image'] if style['background-image'] \
                    else style['background']
                bgi = re.sub(
                    r'(.*?\(\'*\"*)(.*?\.)(png|jpg|jpe?g|gif)(.*?\'*\"*\))',
                    r'\2\3',
                    bgi,
                    flags=re.I)
                bgi = re.sub(r'(.*?\.png|gif|jpe?g)(.*)',
                             r'\1',
                             bgi,
                             flags=re.I)
                img_links.append(bgi)
        return img_links
Esempio n. 34
0
        def test_list_style_normalization(self):
            def ls_dict(expected):
                ans = {
                    'list-style-%s' % x: DEFAULTS['list-style-%s' % x]
                    for x in ('type', 'image', 'position')
                }
                for k, v in expected.items():
                    ans['list-style-%s' % k] = v
                return ans

            for raw, expected in {
                    'url(http://www.example.com/images/list.png)': {
                        'image': 'url(http://www.example.com/images/list.png)'
                    },
                    'inside square': {
                        'position': 'inside',
                        'type': 'square'
                    },
                    'upper-roman url(img) outside': {
                        'position': 'outside',
                        'type': 'upper-roman',
                        'image': 'url(img)'
                    },
            }.items():
                cval = tuple(parseStyle('list-style: %s' % raw,
                                        validate=False))[0].cssValue
                self.assertDictEqual(
                    ls_dict(expected), normalizers['list-style']('list-style',
                                                                 cval))
def scrape_images():
    browser = init_browser()

    images_data = {}

    mars_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

    browser.visit(mars_url)
    time.sleep(5)

    images_html = browser.html
    images_soup = bs(images_html, 'html.parser')

    art_style = images_soup.find('article')['style']
    style = cssutils.parseStyle(art_style)
    img_url = style['background-image']
    img_url = img_url.replace('url(', '').replace(')', '')
    featured_image_url = 'https://www.jpl.nasa.gov' + img_url
    featured_image_url

    images_data['image_url'] = featured_image_url

    browser.quit()

    return images_data
Esempio n. 36
0
 async def handle_content(self, content):
     soup = BeautifulSoup(content, 'html.parser')
     if self.no_dorks is not True:
         for p_elem in soup.find_all('p'):
             if p_elem.findChildren():
                 continue
             css = None
             if 'style' in p_elem.attrs:
                 css = cssutils.parseStyle(p_elem.attrs['style'])
             text_list = p_elem.text.split()
             p_new = soup.new_tag('p', style=css.cssText if css else None)
             for idx, word in enumerate(text_list):
                 # Fetch dorks if required
                 if len(self.dorks) <= 0:
                     self.dorks = await self.get_dorks()
                 word += ' '
                 if idx % 5 == 0:
                     a_tag = soup.new_tag(
                         'a',
                         href=self.dorks.pop(),
                         style=
                         'color:{color};text-decoration:none;cursor:text;'.
                         format(color=css.color if css
                                and 'color' in css.keys() else '#000000'))
                     a_tag.string = word
                     p_new.append(a_tag)
                 else:
                     p_new.append(soup.new_string(word))
             p_elem.replace_with(p_new)
     content = soup.encode('utf-8')
     return content
Esempio n. 37
0
def fetch_telegram(url, page_name):
    print('Start fetch {}: {}'.format(page_name, url))
    s = requests.Session()
    headers = {
        "User-Agent":
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:71.0) Gecko/20100101 Firefox/71.0'
    }
    resp = s.get(url, headers=headers, verify=False)
    soup = BeautifulSoup(resp.text, 'html.parser')
    soup = soup.find_all("div", {"class": "tgme_widget_message_bubble"})
    for i in range(len(soup)):
        info_post = soup[i].find_all("div",
                                     {"class": "tgme_widget_message_info"})
        mess_text = soup[i].find_all("div",
                                     {"class": "tgme_widget_message_text"})

        # remove posts with only images
        if mess_text == []:
            continue
        content = BeautifulSoup(str(mess_text[0]), 'html.parser')
        info_post = BeautifulSoup(str(info_post[0]), 'html.parser')
        media = soup[i].find_all("i", {"class": "link_preview_image"})

        # Post announcements removed
        if content.find_all('a') == []:
            continue

        get_url = content.find_all('a')[0]
        url = get_url.get('href')
        origin = info_post.find_all('a')[0].get('href')
        content = mess_text[0].decode_contents()
        post_time = info_post.find_all('time')[0].get('datetime')

        if media != []:
            div_style = BeautifulSoup(str(media[0]),
                                      'html.parser').find('i')['style']
            style = cssutils.parseStyle(div_style)
            url_image = style['background-image']
            url_image = url_image[4:-1]
        else:
            url_image = ''
        info = {
            "category": LINK.CATEGORY_WEB,
            "content": content,
            "created_at": datetime.strptime(post_time[:-6],
                                            "%Y-%m-%dT%H:%M:%S"),
            "kind": LINK.KIND_LINK,
            "media": url_image,
            "origin": origin,
            "read": LINK.UNREAD,
            "status": LINK.STATUS_DONE,
            "title": page_name,
            "url": url
        }
        info['url'] = clean_up_url(info['url'])
        if Link.query.filter(Link.url == info['url']).first():
            continue
        else:
            Link.insert_from(info)
def scrape_info():
    browser = init_browser()

    # Visit visitcostarica.herokuapp.com
    #    url = "https://visitcostarica.herokuapp.com/"
    url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)

    time.sleep(1)

    # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")

    carousel = soup.find('div', class_='carousel_items')
    div_style = carousel.find('article')['style']
    style = cssutils.parseStyle(div_style)
    partial_url = style['background-image']

    #    print(partial_url)

    partial_url = partial_url.replace('url(', '').replace(')', '')
    featured_image_url = "https://jpl.nasa.gov" + partial_url

    print("###############")
    print(featured_image_url)
    print("###############")

    #    mars_image = soup.find('div',id='page')
    mars_image = featured_image_url

    # Get the average temps
    #    avg_temps = soup.find('div', id='weather')

    # Get the min avg temp
    #    min_temp = avg_temps.find_all('strong')[0].text

    # Get the max avg temp
    #    max_temp = avg_temps.find_all('strong')[1].text

    # BONUS: Find the src for the sloth image
    #    relative_image_path = soup.find_all('img')[2]["src"]
    #    mars_img = url + relative_image_path

    min_temp = 66
    max_temp = 99

    # Store data in a dictionary
    mars_data = {
        "mars_img": mars_image,
        "min_temp": min_temp,
        "max_temp": max_temp
    }

    # Close the browser after scraping
    browser.quit()

    # Return results
    return mars_data
    def get_image_src_from_style(self, style_content):
        import cssutils

        style = cssutils.parseStyle(style_content)
        url = style['background-image']
        url = url.replace("'", '').replace('"', '')
        url = url.replace('url(', '').replace(')', '')
        return url
Esempio n. 40
0
def styled(element, styles):
    if element.tag != 'span':
        element = sub_element(element, 'span')
    declare = parseStyle(element.attrib.get('style', ''))
    for k, v in styles.items():
        declare.setProperty(k, v)
    element.attrib['style'] = declare.getCssText(' ')
    return element
Esempio n. 41
0
def craft_image_url(image):
    image_url = image["style"]
    image_url = cssutils.parseStyle(image_url)
    image_url = image_url["background-image"]
    image_url = image_url.replace("url(", "").replace(")", "")
    image_url = image_url[1:-15]
    response  = urllib.request.urlopen(image_url)
    return image_url, response
Esempio n. 42
0
        def test_border_normalization(self):
            def border_edge_dict(expected, edge="right"):
                ans = {
                    "border-%s-%s" % (edge, x): DEFAULTS["border-%s-%s" % (edge, x)]
                    for x in ("style", "width", "color")
                }
                for x, v in expected.iteritems():
                    ans["border-%s-%s" % (edge, x)] = v
                return ans

            def border_dict(expected):
                ans = {}
                for edge in EDGES:
                    ans.update(border_edge_dict(expected, edge))
                return ans

            def border_val_dict(expected, val="color"):
                ans = {"border-%s-%s" % (edge, val): DEFAULTS["border-%s-%s" % (edge, val)] for edge in EDGES}
                for edge in EDGES:
                    ans["border-%s-%s" % (edge, val)] = expected
                return ans

            for raw, expected in {
                "solid 1px red": {"color": "red", "width": "1px", "style": "solid"},
                "1px": {"width": "1px"},
                "#aaa": {"color": "#aaa"},
                "2em groove": {"width": "2em", "style": "groove"},
            }.iteritems():
                for edge in EDGES:
                    br = "border-%s" % edge
                    val = tuple(parseStyle("%s: %s" % (br, raw), validate=False))[0].cssValue
                    self.assertDictEqual(border_edge_dict(expected, edge), normalizers[br](br, val))

            for raw, expected in {
                "solid 1px red": {"color": "red", "width": "1px", "style": "solid"},
                "1px": {"width": "1px"},
                "#aaa": {"color": "#aaa"},
                "thin groove": {"width": "thin", "style": "groove"},
            }.iteritems():
                val = tuple(parseStyle("%s: %s" % ("border", raw), validate=False))[0].cssValue
                self.assertDictEqual(border_dict(expected), normalizers["border"]("border", val))

            for name, val in {"width": "10%", "color": "rgb(0, 1, 1)", "style": "double"}.iteritems():
                cval = tuple(parseStyle("border-%s: %s" % (name, val), validate=False))[0].cssValue
                self.assertDictEqual(border_val_dict(val, name), normalizers["border-" + name]("border-" + name, cval))
Esempio n. 43
0
    def strip_dom(self, tree):
        root = tree.getroot()

        for grp in root.findall('svg:g', namespaces):
            if grp.get(_fqattr('inkscape', 'groupmode')) != 'layer':
                continue

            style = cssutils.parseStyle(grp.get('style', ''))
            if style.getProperty('display').value == 'none':
                root.remove(grp)
                continue

        elem = root.find('sodipodi:namedview', namespaces)
        if elem is not None:  # pragma: no branch
            root.remove(elem)

        elem = root.find('svg:metadata', namespaces)
        if elem is not None:  # pragma: no branch
            root.remove(elem)

        with tempfile.TemporaryDirectory() as tmpdirname:
            svgpath = os.path.join(tmpdirname, 'svg')
            with open(svgpath, 'wb') as fp:
                tree.write(fp, encoding='utf-8', xml_declaration=True)

            # OMG.. there must be a better way...
            # OTOH, distributed archives should always contain the stripped
            # version, so this would only ever run on dev machines.
            pngpath = os.path.join(tmpdirname, 'png')
            subprocess.check_call(['inkscape', svgpath, '-e', pngpath],
                                  stdout=subprocess.DEVNULL,
                                  stderr=subprocess.DEVNULL)

            info = subprocess.check_output(
                ['convert', pngpath, '-trim', 'info:-'])
            info = info.decode('ascii')
            m = re.match(
                r'.* PNG (\d+)x(\d+) (\d+)x(\d+)\+(\d+)\+(\d+) .*$', info)
            assert m is not None, info
            width, height, canvas_width, canvas_height, xoffset, yoffset = [
                int(g) for g in m.groups()]
            xoffset -= 8
            yoffset -= 8
            width += 16
            height += 16

            root.set(
                'viewBox', '%d %d %d %d' % (xoffset, yoffset, width, height))
            root.set(
                _fqattr('noisicaa', 'origin-x'),
                '%f' % ((canvas_width / 2 - xoffset) / 2))
            root.set(
                _fqattr('noisicaa', 'origin-y'),
                '%f' % ((canvas_height / 2 - yoffset) / 2))
            root.set('width', '%f' % (width / 2))
            root.set('height', '%f' % (height / 2))
Esempio n. 44
0
def get_note_esp_vert(soup):
	#<span style="float: left; width: 160px;">Espaces verts :</span>&nbsp;<div class="progress_bar"><div style="height: 19px;width: 124px; opacity: 0.5 ; background-color: #fff; float: right;"></div></div><br class="clear" />
	var=None
	try:
		temp=soup.find("span", text=re.compile("Espaces verts")).find_next("div").find_next("div")["style"]
		var = 6-(int(re.match(r'\d+', parseStyle(temp).width).group())/31)
	except Exception as e:
		print("no note_esp_vert", e)
	print("note_esp_vert", var)
	return var
Esempio n. 45
0
def make_text(text, width=-1, style=''):
    """
    Make and return a TextFragment built from a TextBox in an HTML document.
    """
    style = dict(effective_declarations(cssutils.parseStyle(
        'font-family: Nimbus Mono L, Liberation Mono, FreeMono, Monospace; '
        + style)))
    style = computed_from_cascaded(None, style, None)
    surface = cairo.SVGSurface(None, 1, 1)
    return TextFragment(text, style, cairo.Context(surface), width)
Esempio n. 46
0
def dl_stickers(page):
    images = page.find_all('span', attrs={"style": not ""})
    for i in images:
        imageurl = i['style']
        imageurl = cssutils.parseStyle(imageurl)
        imageurl = imageurl['background-image']
        imageurl = imageurl.replace('url(', '').replace(')', '')
        imageurl = imageurl[1:-15]
        response = urllib.request.urlopen(imageurl)
        resize_sticker(response, imageurl)
Esempio n. 47
0
        def test_border_normalization(self):
            def border_edge_dict(expected, edge='right'):
                ans = {'border-%s-%s' % (edge, x): DEFAULTS['border-%s-%s' % (edge, x)] for x in ('style', 'width', 'color')}
                for x, v in expected.iteritems():
                    ans['border-%s-%s' % (edge, x)] = v
                return ans

            def border_dict(expected):
                ans = {}
                for edge in EDGES:
                    ans.update(border_edge_dict(expected, edge))
                return ans

            def border_val_dict(expected, val='color'):
                ans = {'border-%s-%s' % (edge, val): DEFAULTS['border-%s-%s' % (edge, val)] for edge in EDGES}
                for edge in EDGES:
                    ans['border-%s-%s' % (edge, val)] = expected
                return ans

            for raw, expected in {
                'solid 1px red': {'color':'red', 'width':'1px', 'style':'solid'},
                '1px': {'width': '1px'}, '#aaa': {'color': '#aaa'},
                '2em groove': {'width':'2em', 'style':'groove'},
            }.iteritems():
                for edge in EDGES:
                    br = 'border-%s' % edge
                    val = tuple(parseStyle('%s: %s' % (br, raw), validate=False))[0].cssValue
                    self.assertDictEqual(border_edge_dict(expected, edge), normalizers[br](br, val))

            for raw, expected in {
                'solid 1px red': {'color':'red', 'width':'1px', 'style':'solid'},
                '1px': {'width': '1px'}, '#aaa': {'color': '#aaa'},
                'thin groove': {'width':'thin', 'style':'groove'},
            }.iteritems():
                val = tuple(parseStyle('%s: %s' % ('border', raw), validate=False))[0].cssValue
                self.assertDictEqual(border_dict(expected), normalizers['border']('border', val))

            for name, val in {
                'width': '10%', 'color': 'rgb(0, 1, 1)', 'style': 'double',
            }.iteritems():
                cval = tuple(parseStyle('border-%s: %s' % (name, val), validate=False))[0].cssValue
                self.assertDictEqual(border_val_dict(val, name), normalizers['border-'+name]('border-'+name, cval))
Esempio n. 48
0
def toBootstrap(form):
    soup = BeautifulSoup(form)
    while True:  # Change tbody tags to div.containers . This works for bootstrap because .containers are stackable(but not container-fluid)
        tbody = soup.find('tbody')
        if not tbody:
            break
        tbody.name = 'div'
        tbody['class'] = tbody.get('class', []) + ['containers']

    while True:  # Change table tags to div.containers
        table = soup.find('table')
        if not table:
            break
        table.name = 'div'
        table['class'] = table.get('class', []) + ['containers']

    while True:  # Change tr to div.row
        tr = soup.find('tr')
        if not tr:
            break
        tr.name = 'div'
        tr['class'] = tr.get('class', []) + ['row']

    while True: # Change td to columns. The xs is for smaller screens. sm is for bigger. These can be changed easily later for fit the layout needs.
        td = soup.find('td')
        if not td:
            break
        td.name = 'div'
        td['class'] = td.get('class', []) + ['col-xs-12', 'col-sm-12']

    divs = soup.findAll('div') # Strip every width, height and text-align inline css style.
    for div in divs:
        css = cssutils.parseStyle(div.get('style', ''))
        del css['width']
        del css['height']
        del css['text-align']
        del css['vertical-align']
        div['style'] = css.cssText

    while True: # Recusively delete the empty p div span tags. On the websites they are often used to add space. This is not a common practice and won't work for most browsers.
       # empty = soup.find(lambda tag: tag.name in ['p','div','span'] and (not tag.contents and (tag.string == None or len(tag.string)<=6)))
       empty = soup.find(lambda tag: tag.name in ['p','div','span'] and tag.find(True) is None and (tag.string is None or tag.string.strip()==""))
       if not empty:
           break
       print (empty)
       empty.extract()
    for tag in soup():
        del tag['align']
        del tag['valign']
        del tag['width']
        del tag['height']
        del tag['max-width']
        del tag['max-height']
    return soup
Esempio n. 49
0
 def test_border_condensation(self):
     vals = 'red solid 5px'
     css = '; '.join('border-%s-%s: %s' % (edge, p, v) for edge in EDGES for p, v in zip(BORDER_PROPS, vals.split()))
     style = parseStyle(css)
     condense_rule(style)
     for e, p in product(EDGES, BORDER_PROPS):
         self.assertFalse(style.getProperty('border-%s-%s' % (e, p)))
         self.assertFalse(style.getProperty('border-%s' % e))
         self.assertFalse(style.getProperty('border-%s' % p))
     self.assertEqual(style.getProperty('border').value, vals)
     css = '; '.join('border-%s-%s: %s' % (edge, p, v) for edge in ('top',) for p, v in zip(BORDER_PROPS, vals.split()))
     style = parseStyle(css)
     condense_rule(style)
     self.assertEqual(style.cssText, 'border-top: %s' % vals)
     css += ';' + '; '.join('border-%s-%s: %s' % (edge, p, v) for edge in ('right', 'left', 'bottom') for p, v in
                      zip(BORDER_PROPS, vals.replace('red', 'green').split()))
     style = parseStyle(css)
     condense_rule(style)
     self.assertEqual(len(style.getProperties()), 4)
     self.assertEqual(style.getProperty('border-top').value, vals)
     self.assertEqual(style.getProperty('border-left').value, vals.replace('red', 'green'))
Esempio n. 50
0
 def test_edge_normalization(self):
     def edge_dict(prefix, expected):
         return {'%s-%s' % (prefix, edge) : x for edge, x in zip(EDGES, expected)}
     for raw, expected in {
         '2px': ('2px', '2px', '2px', '2px'),
         '1em 2em': ('1em', '2em', '1em', '2em'),
         '1em 2em 3em': ('1em', '2em', '3em', '2em'),
         '1 2 3 4': ('1', '2', '3', '4'),
     }.iteritems():
         for prefix in ('margin', 'padding'):
             cval = tuple(parseStyle('%s: %s' % (prefix, raw), validate=False))[0].cssValue
             self.assertDictEqual(edge_dict(prefix, expected), normalizers[prefix](prefix, cval))
    def test_parsevalidation(self):
        style = 'color: 1'
        t = 'a { %s }' % style
                
        cssutils.log.setLevel(logging.DEBUG)
        
        # sheet
        s = self._setHandler()
        cssutils.parseString(t)
        self.assertNotEqual(len(s.getvalue()), 0)
        
        s = self._setHandler()
        cssutils.parseString(t, validate=False)
        self.assertEqual(s.getvalue(), '')

        # style
        s = self._setHandler()
        cssutils.parseStyle(style)
        self.assertNotEqual(len(s.getvalue()), 0)

        s = self._setHandler()
        cssutils.parseStyle(style, validate=True)
        self.assertNotEqual(len(s.getvalue()), 0)

        s = self._setHandler()
        cssutils.parseStyle(style, validate=False)
        self.assertEqual(s.getvalue(), '')
Esempio n. 52
0
 def test_list_style_normalization(self):
     def ls_dict(expected):
         ans = {'list-style-%s' % x : DEFAULTS['list-style-%s' % x] for x in ('type', 'image', 'position')}
         for k, v in expected.iteritems():
             ans['list-style-%s' % k] = v
         return ans
     for raw, expected in {
         'url(http://www.example.com/images/list.png)': {'image': 'url(http://www.example.com/images/list.png)'},
         'inside square': {'position':'inside', 'type':'square'},
         'upper-roman url(img) outside': {'position':'outside', 'type':'upper-roman', 'image':'url(img)'},
     }.iteritems():
         cval = tuple(parseStyle('list-style: %s' % raw, validate=False))[0].cssValue
         self.assertDictEqual(ls_dict(expected), normalizers['list-style']('list-style', cval))
Esempio n. 53
0
        def test_edge_normalization(self):
            def edge_dict(prefix, expected):
                return {"%s-%s" % (prefix, edge): x for edge, x in zip(EDGES, expected)}

            for raw, expected in {
                "2px": ("2px", "2px", "2px", "2px"),
                "1em 2em": ("1em", "2em", "1em", "2em"),
                "1em 2em 3em": ("1em", "2em", "3em", "2em"),
                "1 2 3 4": ("1", "2", "3", "4"),
            }.iteritems():
                for prefix in ("margin", "padding"):
                    cval = tuple(parseStyle("%s: %s" % (prefix, raw), validate=False))[0].cssValue
                    self.assertDictEqual(edge_dict(prefix, expected), normalizers[prefix](prefix, cval))
Esempio n. 54
0
	def iterstyles(node, rules):
		yield from rules
		# According to CSS 2.1 (http://www.w3.org/TR/CSS21/cascade.html#specificity)
		# style attributes have the highest weight, so we yield it last
		# (CSS 3 uses the same weight)
		if "style" in node.attrs:
			style = node.attrs.style
			if not style.isfancy():
				yield (
					(1, 0, 0, 0),
					xfind.IsSelector(node),
					cssutils.parseStyle(str(style)) # parse the style out of the style attribute
				)
Esempio n. 55
0
 def test_children(self):
     "CSSStyleDeclaration.children()"
     style = u'/*1*/color: red; color: green; @x;'
     types = [
         cssutils.css.CSSComment, 
         cssutils.css.Property,
         cssutils.css.Property,
         cssutils.css.CSSUnknownRule
     ] 
     def t(s):
         for i, x in enumerate(s.children()):
             self.assertEqual(types[i], type(x))
             self.assertEqual(x.parent, s)
                                 
     t(cssutils.parseStyle(style))
     t(cssutils.parseString(u'a {'+style+'}').cssRules[0].style)
     t(cssutils.parseString(u'@media all {a {'+style+'}}').cssRules[0].cssRules[0].style)
             
     s = cssutils.parseStyle(style)
     s['x'] = '0'
     self.assertEqual(s, s.getProperty('x').parent)
     s.setProperty('y', '1')
     self.assertEqual(s, s.getProperty('y').parent)
Esempio n. 56
0
        def test_list_style_normalization(self):
            def ls_dict(expected):
                ans = {"list-style-%s" % x: DEFAULTS["list-style-%s" % x] for x in ("type", "image", "position")}
                for k, v in expected.iteritems():
                    ans["list-style-%s" % k] = v
                return ans

            for raw, expected in {
                "url(http://www.example.com/images/list.png)": {"image": "url(http://www.example.com/images/list.png)"},
                "inside square": {"position": "inside", "type": "square"},
                "upper-roman url(img) outside": {"position": "outside", "type": "upper-roman", "image": "url(img)"},
            }.iteritems():
                cval = tuple(parseStyle("list-style: %s" % raw, validate=False))[0].cssValue
                self.assertDictEqual(ls_dict(expected), normalizers["list-style"]("list-style", cval))
Esempio n. 57
0
def csstext_to_pairs(csstext):
    """
    csstext_to_pairs takes css text and make it to list of
    tuple of key,value.
    """
    # The lock is required to avoid ``cssutils`` concurrency
    # issues documented in issue #65
    with csstext_to_pairs._lock:
        return sorted(
            [
                (prop.name.strip(), format_value(prop))
                for prop in cssutils.parseStyle(csstext)
            ],
            key=itemgetter(0),
        )
Esempio n. 58
0
 def _filterStyles(self, article):
     for node in article.tree.xpath('//*[@style]'):
         #original_style = node.get('style')
         try:
             styles = cssutils.parseStyle(node.get('style'), validate=True)
         except ValueError:
             # the node style is broken and cssutils crashes - we remove the style
             del node.attrib['style']
         removed_style = False
         for style in styles.children():
             if hasattr(style, 'valid') and not style.valid:
                 styles.removeProperty(style.name)
                 removed_style = True
         if removed_style:
             node.set('style', styles.getCssText().replace('\n', ''))
Esempio n. 59
0
 def _apply_style_attr(self, url_replacer=None):
     attrib = self._element.attrib
     if 'style' not in attrib:
         return
     css = attrib['style'].split(';')
     css = filter(None, (x.strip() for x in css))
     css = [y.strip() for y in css]
     css = [y for y in css if self.MS_PAT.match(y) is None]
     css = '; '.join(css)
     try:
         style = parseStyle(css, validate=False)
     except CSSSyntaxError:
         return
     if url_replacer is not None:
         replaceUrls(style, url_replacer, ignoreImportRules=True)
     self._style.update(self._stylizer.flatten_style(style))
def parse_tag(tag, update_time):
    obj = {}

    # 車両番号
    if not tag.has_attr("src"):
        logging.error('no "src" attribute: %s', tag)
        return None
    car_id, _ = os.path.splitext(os.path.basename(tag["src"]))
    obj["car_id"] = car_id

    # 位置
    if not tag.has_attr("style"):
        logging.error('no "style" attribute: %s', tag)
        return None
    style = parseStyle(tag["style"])
    top = style.getProperty("top").propertyValue
    if top.length == 0:
        logging.error('no css style "top": %s', tag)
        return None
    left = style.getProperty("left").propertyValue
    if left.length == 0:
        logging.error('no css style "left": %s', tag)
        return None
    pos = resource.get_position(top[0].value, left[0].value)
    if pos is None:
        logging.error("undefined position: <top=%d, left=%d>", top[0].value, left[0].value)
        return None
    obj["status"] = pos[0]
    obj["direction"] = pos[1]
    obj["current_stop"] = pos[2]
    if pos[3] is not None:
        obj["next_stop"] = pos[3]

    # 行先
    if not tag.has_attr("title"):
        logging.error('no "title" attribute: %s', tag)
        return None
    if not tag["title"].startswith(u"行先:"):
        logging.error('"title" attribute is not destination: %s', tag)
        return None
    dest = tag["title"].replace(u"行先:", "").replace(u"行き", "")
    obj["destination"] = resource.find_stop_code(dest)

    # 更新日時
    obj["update_time"] = update_time

    return Car(**obj)