Esempio n. 1
0
def main():
    new_books_url = "https://www.piter.com/collection/new"
    soon_books_url = "https://www.piter.com/collection/soon"
    new_page = get_page(new_books_url)
    soon_page = get_page(soon_books_url)
    new_content = web(new_page, "html.parser")
    soon_content = web(soon_page, "html.parser")
    new_books = books_titles(new_content)
    soon_books = books_titles(soon_content)
    print_titles("=== new === ", new_books)
    print_titles("=== soon ===", soon_books)
Esempio n. 2
0
def get_books():
    new_books_url = "https://www.piter.com/collection/new"
    soon_books_url = "https://www.piter.com/collection/soon"
    new_page = get_page(new_books_url)
    soon_page = get_page(soon_books_url)
    new_content = web(new_page, "html.parser")
    soon_content = web(soon_page, "html.parser")
    new_books = books_titles(new_content)
    soon_books = books_titles(soon_content)
    return {
        "new": new_books,
        "soon": soon_books,
    }
Esempio n. 3
0
def get_books():
    new_books_url = "http://www.williamspublishing.com/indexns.shtml"
    new_page = get_page(new_books_url)
    new_content = web(new_page, "html.parser")
    new_books = books_titles(new_content)
    return {
        "new": new_books,
    }
Esempio n. 4
0
def get_books():
    new_books_url = "http://www.bhv.ru/books/new.php"
    new_page = get_page(new_books_url)
    new_content = web(new_page, "html.parser")
    new_books = books_titles(new_content)[:15]
    return {
        "new": new_books,
    }
Esempio n. 5
0
 def grab(self, url):
     content = web(
         self.get_content(url).replace("&nbsp", ""), "html.parser")
     obj = {}
     obj["title"] = content.select(".b-topic__title")[0].text.strip()
     obj["image"] = content.select(".g-picture")[0]["src"]
     obj["content"] = list(
         map(lambda x: x.text, content.select(".b-text > p")))
     return obj
Esempio n. 6
0
def get_parsed_course(page):
    course = {}
    page_content = web(page, "html.parser")
    course["title"] = get_title(page_content)
    course["start_date"] = get_start_date(page_content)
    course["week_count"] = get_week_count(page_content)
    course["avg_rating"] = get_rating(page_content)
    course["language"] = get_language(page_content)
    return course
Esempio n. 7
0
def main():
    url = "https://dmkpress.com/"
    page = get_page(url)
    content = web(page, "html.parser")
    new_books = new_books_titles(content)
    coming = coming_titles(content)
    print_books = book_print(content)
    print_titles("=== new ===", new_books)
    print_titles("=== coming === ", coming)
    print_titles("=== printed ===", print_books)
Esempio n. 8
0
def get_books():
    url = "https://dmkpress.com/"
    page = get_page(url)
    content = web(page, "html.parser")
    print_books = book_print(content)
    coming = coming_titles(content)
    return {
        "coming": coming,
        "printing": print_books,
    }
Esempio n. 9
0
 def grab(self, url):
     content = web(self.get_content(url, self.site_encoding), "html.parser")
     obj = {}
     if content.select(".article_name"):
         obj["title"] = content.select(".article_name")[0].text
     if content.select(".article_text_wrapper > p"):
         obj["content"] = list(
             map(lambda x: x.text,
                 content.select(".article_text_wrapper > p")))
     return obj
Esempio n. 10
0
def books_scraping():
    response = requests.get("http://bhv.ru")

    if response.status_code == 200:
        response.encoding = "cp1251"
        content = web(response.text, "html.parser")
        books = content.select("#pageRightColumn > .infoBlock > .bookInfo")
        print("===Новинки издательства===\n")
        for book in books:
            author = book.select(".bookAuthor")[0].getText()
            title = book.select(".bookTitle > a")[0].getText()
            print(author, "->", title)
Esempio n. 11
0
 def grab(self, url):
     content = web(self.get_content(url), "html.parser")
     obj = {}
     obj["title"] = content.select(
         ".b-material-before-body__data > h1"
     )[0].text
     obj["image"] = content.select(
         ".b-material-incut-m-image > img"
     )[0]["src"]
     obj["content"] = list(
         map(
             lambda x: x.text,
             content.select(".js-mediator-article > p")
         )
     )
     return obj
Esempio n. 12
0
 def grab(self, url):
     content = web(self.get_content(url, self.site_encoding), "html.parser")
     obj = {}
     if content.select(".textMTitle")[0]:
         obj["title"] = content.select(".textMTitle")[0].text
     if content.select(".inner > img"):
         obj["image"] = "{}{}".format(
             "https://www.interfax.ru/",
             content.select(".inner > img")[0]["src"]
         )
     print(len(content.select(".at > article > p")))
     if content.select(".at > article > p"):
         obj["content"] = list(
             map(
                 lambda x: x.text,
                 content.select(".at > article > p")
             )
         )
     return obj
Esempio n. 13
0
def parse_page(page, handler):
    content = web(page, "html.parser")
    return handler(content)
Esempio n. 14
0
def main():
    new_books_url = "http://www.bhv.ru/books/new.php"
    new_page = get_page(new_books_url)
    new_content = web(new_page, "html.parser")
    new_books = books_titles(new_content)
    print_titles("=== new === ", new_books)
Esempio n. 15
0
# -*- coding: utf-8 -*-
"""
Destiny2 Weekly Reset Scraper
@author: MadMerlyn
"""

from urllib.request import urlopen
import sys
from bs4 import BeautifulSoup as web

#Load LiveEvents submenu
ORIGINAL = sys.stdout
HTML = urlopen('https://www.bungie.net/en/Explore/Category?category=LiveEvents')
SOUP = web(HTML, 'lxml')
#Grab all event links and crawl them
LINKS = SOUP.find_all('a', {'class':'explore-item'})
CRAWL = ['https://www.bungie.net'+x.get('href') for x in LINKS]
HTML2 = [urlopen(x) for x in CRAWL]
ACTIVITIES = [web(x, 'lxml') for x in HTML2]

for item in ACTIVITIES:
    act = item.find('div', {'id':'explore-container'})
    if 'nightfall' in act.get_text().lower():
        event_dates = act.find('div', {'destiny-event-date-range'}).get_text().strip()
        div = act.find('div', {'data-identifier':'quest-activity-information'})
        title = div.find('div', {'class':'title'}).get_text()
        subtitle = div.find('div', {'class':'subtitle'}).get_text()
        div = act.findAll('div', {'data-identifier':'modifier-information'})
        mods = [x.find('div', {'class':'title'}).get_text() for x in div]
        mod_descs = [x.find('div', {'class':'subtitle'}).get_text() for x in div]
Esempio n. 16
0
def main():
    new_books_url = "http://www.williamspublishing.com/indexns.shtml"
    new_page = get_page(new_books_url)
    new_content = web(new_page, "html.parser")
    new_books = books_titles(new_content)
    print_titles("=== new === ", new_books)