Exemple #1
0
class Page(Item):

    current = Attr('div.mt20 > div.pages > a.anext', 'href')
    total = Attr('div.mt20 > div.pages > a.alast', 'href')

    def clean_total(self, total):
        return int(total.rsplit('/', 1)[1])

    def clean_current(self, current):
        print(current)
        return int(current.rsplit('/', 1)[1]) - 20
Exemple #2
0
class Repic(Item):
    img = Attr('a.cook-img', 'style')
    url = Attr('a.cook-img', 'href')
    title = Text('div.cook-info > a.cookname')
    major = Text('div.cook-info > p.major')

    def clean_img(self, img):
        re_img = re.compile(
            'background: url[(](.*)[)] no-repeat center center;background-size: cover;position: relative;'
        )
        return re_img.match(img).groups()[0]
class Page(Item):
    next_page = Attr('.morelink', 'href')

    def clean_next_page(self, value):
        return api.convert_string(
            '/' + value, '/news?p={page}',
            request.host_url.strip('/') + '/posts?page={page}')
Exemple #4
0
class Cookbook(Item):
    title = Text('h2.title')
    img = Attr('.recipe-content > div > div > div > a > img', 'src')
    browse_count = Text('div.vcnum > span')
    collect_count = Text('div.vcnum > span.collectnum')
    intro = Text('p.intro')
    tip = Text('div.tips > p')
Exemple #5
0
class Page(Item):
    next_page = Attr(".morelink", "href")

    def clean_next_page(self, value):
        return api.convert_string(
            "/" + value,
            "/news?p={page}",
            request.host_url.strip("/") + "/posts?page={page}",
        )
Exemple #6
0
import requests
from htmlparsing import Element, HTMLParsing, Text, Attr, Parse

url = 'http://localhost:8082/home/serveList.html'
r = requests.get(url)
article_detail = HTMLParsing(r.text).detail({
    'title':
    Text('a.storylink'),
    'points':
    Parse('span.score', '>{} points'),
    'link':
    Attr('a.storylink', 'href')
})
print(article_detail)
Exemple #7
0
class Post(Item):
    url = Attr('.read-more', 'href')
    title = Text('h1 > a')
Exemple #8
0
 class Post(Item):
     url = Attr('.storylink', 'no this attribute')
     title = Text('.storylink')
Exemple #9
0
 class Post(Item):
     url = Attr('.storylink', 'href')
     title = Text('.storylink')
Exemple #10
0
class Post(Item):
    url = Attr(".storylink", "href")
    title = Text(".storylink")
Exemple #11
0
 class Post(Item):
     url = Attr(".storylink", "no this attribute")
     title = Text(".storylink")
Exemple #12
0
class Course(Item):
    url = Attr('a', 'href')
    title = Text('h4')
Exemple #13
0
class Step(Item):
    img = Attr('img', 'src')
    step = Text('p')
Exemple #14
0
class Selected(Item):
    title = Text('.name')
    img = Attr('a > img', 'src')
    url = Attr('.name', 'href')