Beispiel #1
    def getMetaContent(self, doc, metaName):
        Extract a given meta content form document
        meta = doc.cssselect(metaName)
        content = None

        if meta is not None and len(meta) > 0:
            content = meta[0].attrib.get('content')

        if content is not None:
            return Parser.clearText(content.strip())

        return ''
Beispiel #2
    def extractTags(self, article):
        node = article.doc

        # node doesn't have chidren
        if len(node) == 0:
            return NO_STRINGS

        elements = node.cssselect(A_REL_TAG_SELECTOR)
        if not elements:
            elements = node.cssselect(A_HREF_TAG_SELECTOR)
            if not elements:
                return NO_STRINGS

        tags = []
        for el in elements:
            tag = Parser.clearText(Parser.getText(el).strip())
            if tag:

        return set(tags)