Example #1
0
def wm_txt_parser(html, url, user_id):
    id = url.rsplit('=')[-1]
    name = txt_wrap_by('target="_blank">', '</a></p>', html)
    author = txt_wrap_by('">来自:', '<', html)
    link = txt_wrap_by('href="', '"',
                       txt_wrap_by('<p class="info', '</p>', html))
    like = txt_wrap_by('class="num-likeIt">', '人喜欢</a>', html)
    txt = txt_wrap_by('<div class="content">', ' <p class="operating">', html)

    time = txt_wrap_by('<span class="time">', '</span>', html)
    wm = wm_save(id, like, name, author, link, time, txt)
    wm_fav(user_id, wm.id)
Example #2
0
def wm_parser(html, url):
    user = txt_wrap_by('&u=', '&', url)
    #print user
    time = txt_wrap_by('<li id="maxActionTimeInMs"  m="', '"', html)
    if time and 'm=' + time not in url and int(time) > 0:
        yield wm_parser, url[:url.rfind('=') + 1] + str(time)

    user_id = wm_user_id(user)
    for i in txt_wrap_by_all(' itemid="', '<p class="operating">', html):
        if 'class="content"' in i:
            id = i[:i.find('"')]

            wm = SpiderWm.get(wmid=id)
            if wm is None:
                yield wm_txt_parser, 'http://www.wumii.com/reader/article?id=%s' % id, user_id
            else:
                wm_fav(user_id, wm.id)
Example #3
0
def wm_parser(html, url):
    user = txt_wrap_by('&u=', '&', url)
    #print user
    time = txt_wrap_by('<li id="maxActionTimeInMs"  m="', '"', html)
    if time and 'm='+time not in url and int(time) > 0:
        yield wm_parser, url[:url.rfind('=')+1]+str(time)

    user_id = wm_user_id(user)
    for i in txt_wrap_by_all(' itemid="', '<p class="operating">', html):
        if 'class="content"' in i:
            id = i[:i.find('"')]

            wm = SpiderWm.get(wmid=id)
            if wm is None:
                yield wm_txt_parser, 'http://www.wumii.com/reader/article?id=%s'%id, user_id
            else:
                wm_fav(user_id, wm.id)
Example #4
0
def wm_txt_parser(html, url, user_id):
    id = url.rsplit('=')[-1]
    name = txt_wrap_by('target="_blank">', '</a></p>', html)
    author = txt_wrap_by('">来自:', '<', html)
    link = txt_wrap_by(
        'href="',
        '"',
        txt_wrap_by('<p class="info', '</p>', html)
    )
    like = txt_wrap_by(
        'class="num-likeIt">',
        '人喜欢</a>',
        html
    )
    txt = txt_wrap_by(
        '<div class="content">',
       ' <p class="operating">',
        html
    )

    time = txt_wrap_by('<span class="time">', '</span>', html)
    wm = wm_save(id, like, name, author, link, time, txt)
    wm_fav(user_id, wm.id)