Beispiel #1
0
def parse_title(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_subject.search(text)
    if r:
        subject = r.group(1)
    else:
        subject = 'unknow'
    return tools.to_utf8(subject, encoding)
Beispiel #2
0
def parse_title(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_subject.search(text)
    if r:
        subject = r.group(1)
    else:
        subject = 'unknow'
    return tools.to_utf8(subject, encoding)
Beispiel #3
0
def parse_index(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    b = r_bookid.findall(text)
    bookid = b[0]
    s = []
    for (url, title) in r_index.findall(text):
        title = title.replace(' ', ' ').strip()
        #        url = bookid + url
        yield url, tools.to_utf8(title, encoding)
Beispiel #4
0
def parse_index(text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    b = r_bookid.findall(text)
    bookid = b[0]
    s = []
    for (url, title) in r_index.findall(text):
        title = title.replace(' ', ' ').strip()
#        url = bookid + url
        yield url, tools.to_utf8(title, encoding)
Beispiel #5
0
def parse_page(title, text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_title.search(text)
    if r:
        title = r.group(1).strip()
        title = tools.to_utf8(title, encoding)

    r = r_content.search(text)
    if r:
        text = tools.format_html_text(r.group(1), encoding)
    else:
        text = ''
    return title + '\r\n' * 2 + text
Beispiel #6
0
def parse_page(title, text, proxy=None):
    encoding = tools.get_encoding(r_meta, text)
    r = r_title.search(text)
    if r:
        title = r.group(1).strip()
        title = tools.to_utf8(title, encoding)
        
    r = r_content.search(text)
    if r:
        text = tools.format_html_text(r.group(1), encoding)
    else:
        text = ''
    return title + '\r\n'*2 + text