Ejemplo n.º 1
0
class Article(Entity):
    title = StringField('./h1/text()', xpath=True)
    content = StringField('./p/text()', xpath=True, all=True)
    authors = EntityField(Author,
                          './div[@class="author"]',
                          xpath=True,
                          all=True)
Ejemplo n.º 2
0
class BookChapter(Entity):
    """An entity with various different field types."""
    book = StringField('body>h1::text')
    number = IntField('.chnum::text')
    price = FloatField('.cost::text', re='\$(.+)')
    public = BoolField('p::text', re='Public: (.+)')
    updated = DateTimeField('p::text', re='Last updated on (.+).')
    next_url = StringField('#next::attr("href")', lower=True)
Ejemplo n.º 3
0
class SimpleContent(Entity):
    """A simple entity defined using XPath expressions."""
    title = StringField('//div/h1/text()', xpath=True)
    link_text = StringField('//div/a/text()', xpath=True)
    link_url = StringField('//div/a/@href', xpath=True)
    urls = StringField('/html/body/div/a/@href',
                       xpath=True,
                       all=True,
                       lower=True)
    in_divs = StringField('//div/div', xpath=True, all=True)

    process_title = Chain(six.text_type.capitalize, RAdd('!'))
Ejemplo n.º 4
0
class ArticleC(Entity):
    title = StringField('h1::text')
    content = StringField('p::text', all=True)
    authors = EntityField(AuthorC, 'div.author', all=True)
Ejemplo n.º 5
0
class AuthorC(Entity):
    firstname = StringField('span.firstname::text')
    lastname = StringField('span.lastname::text')
Ejemplo n.º 6
0
class Author(Entity):
    firstname = StringField('./span[@class="firstname"]/text()', xpath=True)
    lastname = StringField('./span[@class="lastname"]/text()', xpath=True)
Ejemplo n.º 7
0
class Blog(Entity):
    title = StringField('./h1/text()', xpath=True)
    content = StringField('./p/text()', xpath=True, all=True)