Example #1
0
class NestedOuterPage(PageModel):
    model_class = dict
    page_tree = Html(
        StrictNode("div.outer")(
            Node("> span")(outertxt=Text()),
            Node("div.inner")(nested=NestedInnerPage()),
        ))
Example #2
0
class AttrPage(PageModel):
    model_class = dict
    page_tree = Html(
        Node("a.mylink")(
            href=Attr("href"),
            title=Attr("title"),
            text=Text(),
        ))
Example #3
0
class PostprocPage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.lower")(lower=Text()))

    @classmethod
    def postproc(cls, dic):
        dic['upper'] = dic.pop('lower', '').upper()
        return dic
Example #4
0
class SimplePage(PageModel):
    model_class = dict
    page_tree = Html(
        Node("body")(
            Node("div.div_1")(div1=Text()),
            Node("#div_2", "#asdf")(div2=Text()),
            Node("span"),
            body=Text(),
        ), )
Example #5
0
class PhrasalVerbLink(PageModel):
    model_class = models.Link

    page_tree = Html(
        Node("a")(
            url=Attr("href"),
            key=Attr("title"),
            link_type=Constant("phrasal verbs"),
            part_of_speech=Constant("phrasal verb"),
        )
    )
Example #6
0
class Entry(PageModel):
    model_class = models.Entry

    page_tree = Html(
        Node("div#headword div#headwordleft span.BASE")(
            original_key=Text()
        ),
        
        Node("div#headbar")(
            Node.optional("span.STYLE-LEVEL")(
                style_level=Text()
            ),
            Node.optional("span.PRON")(
                pron=Text()
            ),
            Node.optional("span.PART-OF-SPEECH")(
                part_of_speech=Text()
            )
        ),
        Node.optional("div.SUMMARY div.p")(
            intro_paragraph=Text()
        ),
        Node.list("div.SENSE-BODY")(
            senses=Sense()
        ),
        Node.optional("div#phrases_container > ul")(
            Node.list("li")(
                phrs=PhraseLink()
                
            )
        ),
        Node.optional("div#phrasal_verbs_container > ul")(
            Node.list("li")(
                phrvbs=PhrasalVerbLink()
            )
        ),
        Node.optional("div.entrylist > ul")(
            Node.list("li")(
                relwrds=RelatedWordLink()
            )
        )
    )

    @classmethod
    def postproc(cls, dic):
        dic['links'] = dic.pop('relwrds', [])
        dic['links'] += dic.pop('phrvbs', []) + dic.pop('phrs', [])
        return dic
Example #7
0
class RelatedWordLink(PageModel):
    model_class = models.Link

    page_tree = Html(
        Node("a")(
            Node.optional("span.PART-OF-SPEECH")(
                part_of_speech=Text()
            ),
            key=Attr("title"),
            url=Attr("href"),
            link_type=Constant("related words"),
        )
    )

    @classmethod
    def postproc(cls, dic):
        k = dic["key"]
        p = dic.get("part_of_speech", "")
        k = k[:-len(p)]
        k = k.strip()
        dic["key"] = k
Example #8
0
class NestedInnerPage(PageModel):
    model_class = dict
    page_tree = Html(Node("span")(innertxt=Text()))
Example #9
0
class ThisClassElem(PageModel):
    model_class = dict
    page_tree = Html(
        Node("> div.head")(head=Text()),
        Node.optional("> div.tail")(tail=ThisClass()),
    )
Example #10
0
class OptionalNodePage(PageModel):
    model_class = dict
    page_tree = Html(Node.optional("div.missing")(x=Text()))
Example #11
0
class StrictPage(PageModel):
    model_class = dict
    page_tree = Html(StrictNode("div.strict")(Node("span"), ))
Example #12
0
class MissingNodePage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.missing")(x=Text()))
Example #13
0
class ThisClassPage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.list")(li=ThisClassElem()))
Example #14
0
 class InvalidPageTwo(PageModel):
     model_class = dict
     page_tree = Html(Node("div")(Text()))
Example #15
0
class ConcatPage(PageModel):
    model_class = dict
    page_tree = Html(
        Node("div.list")(
            Node.list("span.elem").concat(", ")(concatenated=Text())))
Example #16
0
class ConstantPage(PageModel):
    model_class = dict
    page_tree = Html(Node("div.doesnotmatter")(const=Constant("myconstant")))
Example #17
0
class TakefirstPage(PageModel):
    model_class = dict
    page_tree = Html(Node.list("div.listelem").take_first()(firstelem=Text()))
Example #18
0
 class InvalidPage(PageModel):
     model_class = dict
     page_tree = Html(
         Node("div.one")(dupfield=Text()),
         Node("div.two")(dupfield=Text()))
Example #19
0
class ListPage(PageModel):
    model_class = dict
    page_tree = Html(Node(".list")(Node.list(".listelem")(mylistfield=Text())))