예제 #1
0
 def _request(self, url, language=None, data=None):
     if self.config.trace_mode:
         logger.debug(f"URL: {url}")
     if data:
         return self.config.post_html(url,
                                      data=data,
                                      headers=util.header(language))
     else:
         return self.config.get_html(url, headers=util.header(language))
예제 #2
0
 def get_list_description(self, tvdb_url):
     response = self.config.get_html(tvdb_url,
                                     headers=util.header(
                                         self.tvdb_language))
     description = response.xpath(
         "//div[@class='block']/div[not(@style='display:none')]/p/text()")
     return description[0] if len(description) > 0 and len(
         description[0]) > 0 else ""
예제 #3
0
 def get_list_description(self, list_url, language):
     if self.config.trace_mode:
         logger.debug(f"URL: {list_url}")
     response = self.config.get_html(list_url,
                                     headers=util.header(language))
     descriptions = response.xpath(
         "//meta[@property='og:description']/@content")
     return descriptions[0] if len(descriptions) > 0 and len(
         descriptions[0]) > 0 else None
예제 #4
0
 def _tmdb(self, letterboxd_url, language):
     if self.config.trace_mode:
         logger.debug(f"URL: {letterboxd_url}")
     response = self.config.get_html(letterboxd_url,
                                     headers=util.header(language))
     ids = response.xpath("//a[@data-track-action='TMDb']/@href")
     if len(ids) > 0 and ids[0]:
         if "themoviedb.org/movie" in ids[0]:
             return util.regex_first_int(ids[0], "TMDb Movie ID")
         raise Failed(
             f"Letterboxd Error: TMDb Movie ID not found in {ids[0]}")
     raise Failed(
         f"Letterboxd Error: TMDb Movie ID not found at {letterboxd_url}")
예제 #5
0
 def _ids_from_url(self, tvdb_url):
     ids = []
     tvdb_url = tvdb_url.strip()
     if self.config.trace_mode:
         logger.debug(f"URL: {tvdb_url}")
     if tvdb_url.startswith((urls["list"], urls["alt_list"])):
         try:
             response = self.config.get_html(tvdb_url,
                                             headers=util.header(
                                                 self.tvdb_language))
             items = response.xpath(
                 "//div[@class='col-xs-12 col-sm-12 col-md-8 col-lg-8 col-md-pull-4']/div[@class='row']"
             )
             for item in items:
                 title = item.xpath(
                     ".//div[@class='col-xs-12 col-sm-9 mt-2']//a/text()"
                 )[0]
                 item_url = item.xpath(
                     ".//div[@class='col-xs-12 col-sm-9 mt-2']//a/@href")[0]
                 if item_url.startswith("/series/"):
                     try:
                         ids.append(
                             (self.get_series(f"{base_url}{item_url}").id,
                              "tvdb"))
                     except Failed as e:
                         logger.error(f"{e} for series {title}")
                 elif item_url.startswith("/movies/"):
                     try:
                         movie = self.get_movie(f"{base_url}{item_url}")
                         if movie.tmdb_id:
                             ids.append((movie.tmdb_id, "tmdb"))
                         elif movie.imdb_id:
                             ids.append((movie.imdb_id, "imdb"))
                     except Failed as e:
                         logger.error(e)
                 else:
                     logger.error(f"TVDb Error: Skipping Movie: {title}")
                 time.sleep(2)
             if len(ids) > 0:
                 return ids
             raise Failed(f"TVDb Error: No TVDb IDs found at {tvdb_url}")
         except requests.exceptions.MissingSchema:
             util.print_stacktrace()
             raise Failed(f"TVDb Error: URL Lookup Failed for {tvdb_url}")
     else:
         raise Failed(
             f"TVDb Error: {tvdb_url} must begin with {urls['list']}")
예제 #6
0
 def _ids_from_url(self, imdb_url, language, limit):
     total, item_count = self._total(imdb_url, language)
     headers = util.header(language)
     imdb_ids = []
     parsed_url = urlparse(imdb_url)
     params = parse_qs(parsed_url.query)
     imdb_base = parsed_url._replace(query=None).geturl()
     params.pop("start", None)  # noqa
     params.pop("count", None)  # noqa
     params.pop("page", None)  # noqa
     if self.config.trace_mode:
         logger.debug(f"URL: {imdb_base}")
         logger.debug(f"Params: {params}")
     search_url = imdb_base.startswith(urls["searches"])
     if limit < 1 or total < limit:
         limit = total
     remainder = limit % item_count
     if remainder == 0:
         remainder = item_count
     num_of_pages = math.ceil(int(limit) / item_count)
     for i in range(1, num_of_pages + 1):
         start_num = (i - 1) * item_count + 1
         logger.ghost(
             f"Parsing Page {i}/{num_of_pages} {start_num}-{limit if i == num_of_pages else i * item_count}"
         )
         if search_url:
             params[
                 "count"] = remainder if i == num_of_pages else item_count  # noqa
             params["start"] = start_num  # noqa
         else:
             params["page"] = i  # noqa
         response = self.config.get_html(imdb_base,
                                         headers=headers,
                                         params=params)
         ids_found = response.xpath(
             "//div[contains(@class, 'lister-item-image')]//a/img//@data-tconst"
         )
         if not search_url and i == num_of_pages:
             ids_found = ids_found[:remainder]
         imdb_ids.extend(ids_found)
         time.sleep(2)
     logger.exorcise()
     if len(imdb_ids) > 0:
         logger.debug(f"{len(imdb_ids)} IMDb IDs Found: {imdb_ids}")
         return imdb_ids
     raise Failed(f"IMDb Error: No IMDb IDs Found at {imdb_url}")
예제 #7
0
 def _parse_list(self, list_url, language):
     if self.config.trace_mode:
         logger.debug(f"URL: {list_url}")
     response = self.config.get_html(list_url,
                                     headers=util.header(language))
     letterboxd_ids = response.xpath(
         "//li[contains(@class, 'poster-container')]/div/@data-film-id")
     items = []
     for letterboxd_id in letterboxd_ids:
         slugs = response.xpath(
             f"//div[@data-film-id='{letterboxd_id}']/@data-film-slug")
         items.append((letterboxd_id, slugs[0]))
     next_url = response.xpath("//a[@class='next']/@href")
     if len(next_url) > 0:
         time.sleep(2)
         items.extend(self._parse_list(f"{base_url}{next_url[0]}",
                                       language))
     return items
예제 #8
0
 def _total(self, imdb_url, language):
     if imdb_url.startswith(urls["lists"]):
         xpath_total = "//div[@class='desc lister-total-num-results']/text()"
         per_page = 100
     elif imdb_url.startswith(urls["searches"]):
         xpath_total = "//div[@class='desc']/span/text()"
         per_page = 250
     else:
         xpath_total = "//div[@class='desc']/text()"
         per_page = 50
     results = self.config.get_html(
         imdb_url, headers=util.header(language)).xpath(xpath_total)
     total = 0
     for result in results:
         if "title" in result:
             try:
                 total = int(
                     re.findall("(\\d+) title", result.replace(",", ""))[0])
                 break
             except IndexError:
                 pass
     if total > 0:
         return total, per_page
     raise Failed(f"IMDb Error: Failed to parse URL: {imdb_url}")
예제 #9
0
 def _request(self, url, language, xpath):
     if self.config.trace_mode:
         logger.debug(f"URL: {url}")
     return self.config.get_html(url, headers=util.header(language)).xpath(xpath)
예제 #10
0
    def __init__(self, tvdb_url, language, is_movie, config):
        self.tvdb_url = tvdb_url.strip()
        self.language = language
        self.is_movie = is_movie
        self.config = config
        if not self.is_movie and self.tvdb_url.startswith(
            (urls["series"], urls["alt_series"], urls["series_id"])):
            self.media_type = "Series"
        elif self.is_movie and self.tvdb_url.startswith(
            (urls["movies"], urls["alt_movies"], urls["movie_id"])):
            self.media_type = "Movie"
        else:
            raise Failed(
                f"TVDb Error: {self.tvdb_url} must begin with {urls['movies'] if self.is_movie else urls['series']}"
            )

        if self.config.trace_mode:
            logger.debug(f"URL: {tvdb_url}")
        response = self.config.get_html(self.tvdb_url,
                                        headers=util.header(self.language))
        results = response.xpath(
            f"//*[text()='TheTVDB.com {self.media_type} ID']/parent::node()/span/text()"
        )
        if len(results) > 0:
            self.id = int(results[0])
        elif self.tvdb_url.startswith(urls["movie_id"]):
            raise Failed(
                f"TVDb Error: Could not find a TVDb Movie using TVDb Movie ID: {self.tvdb_url[len(urls['movie_id']):]}"
            )
        elif self.tvdb_url.startswith(urls["series_id"]):
            raise Failed(
                f"TVDb Error: Could not find a TVDb Series using TVDb Series ID: {self.tvdb_url[len(urls['series_id']):]}"
            )
        else:
            raise Failed(
                f"TVDb Error: Could not find a TVDb {self.media_type} ID at the URL {self.tvdb_url}"
            )

        def parse_page(xpath):
            parse_results = response.xpath(xpath)
            if len(parse_results) > 0:
                parse_results = [
                    r.strip() for r in parse_results if len(r) > 0
                ]
            return parse_results[0] if len(parse_results) > 0 else None

        def parse_title_summary(lang=None):
            place = "//div[@class='change_translation_text' and "
            place += f"@data-language='{lang}']" if lang else "not(@style='display:none')]"
            return parse_page(f"{place}/@data-title"), parse_page(
                f"{place}/p/text()[normalize-space()]")

        self.title, self.summary = parse_title_summary(lang=self.language)
        if not self.title and self.language in language_translation:
            self.title, self.summary = parse_title_summary(
                lang=language_translation[self.language])
        if not self.title:
            self.title, self.summary = parse_title_summary()
        if not self.title:
            raise Failed(
                f"TVDb Error: Name not found from TVDb URL: {self.tvdb_url}")

        self.poster_path = parse_page(
            "//div[@class='row hidden-xs hidden-sm']/div/img/@src")
        self.background_path = parse_page(
            "(//h2[@class='mt-4' and text()='Backgrounds']/following::div/a/@href)[1]"
        )
        if self.is_movie:
            self.directors = parse_page(
                "//strong[text()='Directors']/parent::li/span/a/text()[normalize-space()]"
            )
            self.writers = parse_page(
                "//strong[text()='Writers']/parent::li/span/a/text()[normalize-space()]"
            )
            self.studios = parse_page(
                "//strong[text()='Studio']/parent::li/span/a/text()[normalize-space()]"
            )
        else:
            self.networks = parse_page(
                "//strong[text()='Networks']/parent::li/span/a/text()[normalize-space()]"
            )
        self.genres = parse_page(
            "//strong[text()='Genres']/parent::li/span/a/text()[normalize-space()]"
        )

        tmdb_id = None
        imdb_id = None
        if self.is_movie:
            results = response.xpath("//*[text()='TheMovieDB.com']/@href")
            if len(results) > 0:
                try:
                    tmdb_id = util.regex_first_int(results[0], "TMDb ID")
                except Failed:
                    pass
            results = response.xpath("//*[text()='IMDB']/@href")
            if len(results) > 0:
                try:
                    imdb_id = util.get_id_from_imdb_url(results[0])
                except Failed:
                    pass
            if tmdb_id is None and imdb_id is None:
                raise Failed(
                    f"TVDB Error: No TMDb ID or IMDb ID found for {self.title}"
                )
        self.tmdb_id = tmdb_id
        self.imdb_id = imdb_id