Beispiel #1
0

def get_cast(element):
    if (element.name is not None):
        for child in element.children:
            if child.name in ["table", "div", "td", "ul", "span", "section"]:
                pos = soup.find_all(["table", "div", "td"])
                for item in pos:
                    if (hasKey(cast_temp, str(item.parent))):
                        cast_temp[str(
                            item.parent)] = 1 + cast_temp[(str(item.parent))]
                    else:
                        cast_temp[str(item.parent)] = 1
        sorted_x = sorted(cast_temp.items(), key=operator.itemgetter(1))
        return sorted_x


def hasKey(hashMap, key):
    for x in hashMap:
        if x == key:
            return True
    return False


with open('extractor\sites.txt') as f:
    lines = f.readlines()
    f.close()
for site in lines:
    soup = r.get_link(site)
    a = get_cast(soup.find("body"))
    print(a[-1])
Beispiel #2
0
Datei: imdb.py Projekt: rllima/RI
import reader as r

soup = r.get_link("http://www.imdb.com/title/tt0413573/?ref_=nv_sr_1")
title = soup.find("h1", {"itemprop": "name"}).text
creator = soup.find("span", {"itemprop": "creator"}).text
print(title)
rating = soup.find("span", {"itemprop": "ratingValue"}).text
print(rating)
cast_table = soup.find("table", {
    "class": "cast_list"
}).find_all("span", {"class": "itemprop"})
for item in cast_table:
    actor = item.text
    print(actor)

character_list = soup.find("table", {
    "class": "cast_list"
}).find_all("td", {"class": "character"})
for item in character_list:
    actor_name = item.find("div").a.text
    print(actor_name)

resume = soup.find("div", {"itemprop": "description"}).text
print(resume)
genres = soup.find("div", {"itemprop": "genre"}).find_all("a")
genre = []
for item in genres:
    genre.append(item.text)
print(genre)

details = soup.find("div", {"id": "titleDetails"}).find_all("div")
Beispiel #3
0
import reader as r


soup = r.get_link("https://www.rottentomatoes.com/tv/lost_in_space/")
title = soup.find("h1", {"class":"title"}).text
resume = soup.find("div", {"id": "movieSynopsis"}).text
genre = soup.find("td", text = "Genre:").parent.text
cast = soup.find_all("div",{"class":"cast-item media inlineBlock "})
cast_list = []
for item in cast:
    actor = item.find("div").find("a").text.strip()
    characther = str.replace(item.find("span",{"class": "characters subtle smaller"}).text,"as ","")
    cast_list.append([actor,characther])
rate = soup.find("div",{"class":"critic-score meter"}).span.text


Beispiel #4
0
import reader as r

soup = r.get_link("https://www.themoviedb.org/tv/1418-the-big-bang-theory")
title = soup.find("div", {"class": "title"}).text
resume = soup.find("div", {"class": "overview"}).text
creator = soup.find_all("li", {"class": "profile"})
creator_list = []
for item in creator:
    creator_list.append(item.text.strip())
cast = soup.find("ol", {"class": "people scroller"}).find_all("li")
cast_list = []
for item in cast:
    cast_list.append(item.text.strip().split("\n"))
genre = soup.find("section", {"class": "genres right_column"}).find("li").text
Beispiel #5
0
import reader as r

soup = r.get_link("http://www.tvguide.com/tvshows/the-flash/644014/")
title = soup.find("div", {
    "class": "tvobject-masthead-wrapper content-wrapper"
}).find("h1").text.strip()
resume = soup.find("div", {
    "class": "tvobject-masthead-wrapper content-wrapper"
}).find("div", {
    "class": "tvobject-masthead-description"
}).text.strip()
cast = soup.find("div", {
    "data-section-id": "cast"
}).find("div", {
    "class": "row"
}).find_all("div")
cast_list = []
for item in cast:
    cast_list.append(item.text.strip())
Beispiel #6
0
import reader as r

soup = r.get_link("https://trakt.tv/shows/the-big-bang-theory")
title = soup.find("div", {
    "class":
    "col-md-10 col-md-offset-2 col-sm-9 col-sm-offset-3 mobile-title"
}).text
country = soup.find("li", {"itemprop": "countryOfOrigin"}).text
language = soup.find("label", text="Language").parent.text
language = str.replace(language, "Language", "")
genre = str.replace(
    soup.find("label", text="Genres").parent.text, "Genres", "")
print(genre)
description = soup.find("div", {"itemprop": "description"}).text
cast_list = []
list_actors = soup.find_all("li", {"itemprop": "actor"})
for item in list_actors:
    name = item.find("h4", itemprop="name").text
    character = item.find("h4", {"class": "character"}).text
    cast_list.append([name, character])
numberOfSeasons = soup.find("span", {"class": "season-count"}).text