Esempio n. 1
0
def get_soup_from_wiki_search(record: Record,
                              search_url: str) -> BeautifulSoup:
    search_soup = make_soup(search_url)

    if 'search=' not in search_url:
        return search_soup

    results = search_soup.findAll('li', class_='mw-search-result')
    for result in results:
        if record.title.lower() in result.find('a').text.lower():
            url = en_wiki_base if 'en.' in search_url else nl_wiki_base
            url += result.find('a')['href']
            return make_soup(url)
Esempio n. 2
0
def get_page_url_from_title(title: str) -> str:
    search_query = quote(title)
    title = title.lower()
    soup = make_soup(imdb_find_url + search_query)

    if soup.find('div', class_='findNoResults') is not None:
        return ''

    s = None

    for search in soup.findAll('div', class_='findSection'):
        if 'Titles' in search.find('h3').text:
            s = search
            break

    if s:
        titles = [x for x in s.find('table').findAll('tr')]

        if titles:
            match = list(filter(
                lambda x: title in x.find('td', class_='result_text').find('a').text.strip().lower(),
                titles))

            if len(match):
                title = match[0]
                matching_url = imdb_base_url + title.find('a')['href'].split("?")[0]
                print("Parsing IMDB page: " + matching_url)
                return matching_url
Esempio n. 3
0
def get_movie_row(title, cast=None) -> Optional[MovieRow]:

    matching_url = get_page_url_from_title(title)

    if not matching_url:
        return None

    soup = make_soup(matching_url)
    mr = MovieRow(title)

    try:
        mr.yop = get_year_of_production_from_soup(soup)
    except Exception as e:
        print("Failed to parse Year of Production from movie page (IMDB)")

    if not cast or not len(cast):
        try:
            mr.cast = get_cast_list_string(soup)
            print("Set cast: " + mr.cast)
        except Exception as e:
            print("Failed to parse cast from movie page (IMDB)")
    else:
        mr.cast = cast

    try:
        mr.language = get_language_from_soup(soup)
    except Exception as e:
        print("Failed to parse broadcast language from movie page")

    return mr
Esempio n. 4
0
def add_nl_info(record) -> bool:
    try:
        url = nl_wiki_base + title_to_search(record.title)
        parsed = parse_wiki_page_nl(record, make_soup(url))
        if parsed:
            print(f"Added schedule info from (NL Wiki) : {url}  ")
        return parsed
    except URLError as e:
        print("No matching NL wiki page")
Esempio n. 5
0
def add_en_info(record) -> bool:
    try:
        url = en_wiki_base + title_to_search(str(record.title))
        soup = make_soup(url)

        if soup is not None:
            parsed = parse_wiki_page_en(record, soup)
            if parsed:
                print(f"Added schedule info from (EN Wiki) : {url}  ")
            return parsed
    except URLError as e:
        print("No matching EN wiki page")
Esempio n. 6
0
def add_schedule_info_to_record(record: Record):      # Return if page was found & information added

    imdb_page_url = IMDB.get_page_url_from_title(record.title)
    parsed = False

    if imdb_page_url:
        page_soup = make_soup(imdb_page_url)
        IMDB.add_schedule_info_to_record(record, page_soup)
        parsed = True

    if not parsed:
        parsed = WIKI.add_en_info(record)

    if not parsed:
        WIKI.add_nl_info(record)
Esempio n. 7
0
def get_summary_detail_row(title, episode, season) -> Optional[SeriesDetailRow]:
    record_url = get_page_url_from_title(title)
    if not record_url:
        return None

    ep_guide_url = record_url + f'episodes?season={season}'
    print("Season URL: " + ep_guide_url)

    try:
        season_soup = make_soup(ep_guide_url)
        if season_soup.find('a', text='TV Episodes submission guide') is not None:
            return None
    except Exception as e:
        return None

    try:
        if int(season) > len(season_soup.find('select', id='bySeason').findAll('option')):
            print(f"Season {season} of {title} not visible (IMDB)")
            return None
    except Exception as e:
        return None

    season_year = ''
Esempio n. 8
0
    season_year = ''
s
    try:
        first_ep = season_soup.find('div', class_=list_item_regex)
        season_year = int(first_ep.find('div', class_='airdate').text.strip().split(" ")[-1])
    except Exception as e:
        pass

    ep_soup = None

    try:
        print("Searching " + ep_guide_url + " For season: " + season + " episode: " + episode)
        episode_list = season_soup.find('div', class_='list detail eplist').findAll('div', class_=list_item_regex)
        episode_box = episode_list[int(episode) - 1]
        ep_soup = make_soup(imdb_base_url + episode_box.find('a')['href'])
    except IndexError as e:
        print(f"Episode {episode} not visible for title {title} : {e}")
    except (TypeError, AttributeError) as e:
        print(f"Error adding info from IMDB for record with title: {title} for ep {episode} season {season} ")
        print("Exception: " + str(e))

    return get_series_summary_from_imdb_page_soup(ep_soup=ep_soup, episode=episode, season=season,
                                                  title=title, season_year=season_year)


def get_series_summary_from_imdb_page_soup(ep_soup, title, episode, season, season_year) -> SeriesDetailRow:

    s_row = SeriesDetailRow()
    s_row.title = title
    s_row.episode = episode