Python build_soup Beispiele, nthu_library.tools.build_soup Python Beispiele

Beispiel #1

0

Datei anzeigen

def crawl_top_circulations(type, query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        year_tag = table.get('summary')
        year, tag = re.findall('(\d+)年(\S+)',year_tag)[0]
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
                ref = None

            circulaion = Circulation(
                type=type,
                bookname=title.text.strip(' /'),
                url=ref.get('href') if ref else None,
                rank=rk.text,
                tag=tag,
                year=year,
                count=cnt.text
            )
            db.session.add(circulaion)
    db.session.commit()

Beispiel #2

0

Datei anzeigen

Datei: crawler.py Projekt: leVirve/NTHU-Library

def get_circulation_links():
    return [
        ({'text': a.text, 'href': a.get('href')},
         urljoin(nthu_library_url.top_circulations, a.get('href')))
        for resp in get_pages([
            nthu_library_url.top_circulations,
            nthu_library_url.top_circulations_bc2007])
        for a in build_soup(resp).find(id='cwrp').find_all('a')
    ]

Beispiel #3

0

Datei anzeigen

def get_circulation_links():
    return [
        ({'text': a.text, 'href': a.get('href')},
         urljoin(nthu_library_url.top_circulations, a.get('href')))
        for resp in get_pages([
            nthu_library_url.top_circulations,
            nthu_library_url.top_circulations_bc2007])
        for a in build_soup(resp).find(id='cwrp').find_all('a')
    ]

Beispiel #4

0

Datei anzeigen

Datei: crawler.py Projekt: leVirve/NTHU-Library

def crawl_lost_objects(data):
    soup = post_page(nthu_library_url.lost_found_url, data=data)
    lost_items = list()
    for item in build_soup(soup).select('table > tr')[1:]:
        lost_items.append({
            'id': item.select('td:nth-of-type(1)')[0].text,
            'time': item.select('td:nth-of-type(2)')[0].text,
            'place': item.select('td:nth-of-type(3)')[0].text,
            'description': item.select('td:nth-of-type(4)')[0].text,
        })
    return lost_items

Beispiel #5

0

Datei anzeigen

def crawl_lost_objects(data):
    soup = post_page(nthu_library_url.lost_found_url, data=data)
    lost_items = list()
    for item in build_soup(soup).select('table > tr')[1:]:
        lost_items.append({
            'id': item.select('td:nth-of-type(1)')[0].text,
            'time': item.select('td:nth-of-type(2)')[0].text,
            'place': item.select('td:nth-of-type(3)')[0].text,
            'description': item.select('td:nth-of-type(4)')[0].text,
        })
    return lost_items

Beispiel #6

0

Datei anzeigen

Datei: crawler.py Projekt: Tri-Try/NTHU-Library

def crawl_lost_objects(data):
    soup = post_page(nthu_library_url.lost_found_url, data=data)
    lost_items = list()
    for item in build_soup(soup).select('table > tr')[1:]:
        r = [s.strip()
             for s in item.select('td:nth-of-type(4)')[0].text.split('\r\n')]
        sysid = re.search('\d+', r[1])
        lost_items.append({
            'id': item.select('td:nth-of-type(1)')[0].text,
            'time': item.select('td:nth-of-type(2)')[0].text,
            'place': item.select('td:nth-of-type(3)')[0].text,
            'description': r[0],
            'system_id': sysid.group() if sysid else None
        })
    return lost_items

Beispiel #7

0

Datei anzeigen

def crawl_lost_objects(data):
    soup = post_page(nthu_library_url.lost_found_url, data=data)
    lost_items = list()
    for item in build_soup(soup).select('table > tr')[1:]:
        r = [s.strip()
             for s in item.select('td:nth-of-type(4)')[0].text.split('\r\n')]
        sysid = re.search('\d+', r[1])
        lost_items.append({
            'id': item.select('td:nth-of-type(1)')[0].text,
            'time': item.select('td:nth-of-type(2)')[0].text,
            'place': item.select('td:nth-of-type(3)')[0].text,
            'description': r[0],
            'system_id': sysid.group() if sysid else None
        })
    return lost_items

Beispiel #8

0

Datei anzeigen

Datei: crawler.py Projekt: leVirve/NTHU-Library

def crawl_top_circulations(query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
            books.append({
                'rank': rk.text,
                'book_name': title.text.strip(' /'),
                'link': ref.get('href') if ref else None,
                'circulations': cnt.text
            })
        results[table.get('summary')] = books
    return results

Beispiel #9

0

Datei anzeigen

def crawl_top_circulations(query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
            books.append({
                'rank': rk.text,
                'bookname': title.text.strip(' /'),
                'link': ref.get('href') if ref else None,
                'times': cnt.text
            })
        results[table.get('summary')] = books
    return results

Beispiel #10

0

Datei anzeigen

Datei: crawler.py Projekt: Tri-Try/NTHU-Library

def crawl_top_circulations(rank_type, query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        year_tag = table.get('summary')
        year, tag = re.findall('(\d+)年(\S+)', year_tag)[0]
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
            books.append({
                'type': rank_type,
                'book_name': title.text.strip(' /'),
                'url': ref.get('href') if ref else None,
                'rank': rk.text,
                'tag': tag,
                'year': year,
                'circulations': cnt.text
            })
        results[table.get('summary')] = books
    return results