def _request(self, url, language=None, data=None): if self.config.trace_mode: logger.debug(f"URL: {url}") if data: return self.config.post_html(url, data=data, headers=util.header(language)) else: return self.config.get_html(url, headers=util.header(language))
def get_list_description(self, tvdb_url): response = self.config.get_html(tvdb_url, headers=util.header( self.tvdb_language)) description = response.xpath( "//div[@class='block']/div[not(@style='display:none')]/p/text()") return description[0] if len(description) > 0 and len( description[0]) > 0 else ""
def get_list_description(self, list_url, language): if self.config.trace_mode: logger.debug(f"URL: {list_url}") response = self.config.get_html(list_url, headers=util.header(language)) descriptions = response.xpath( "//meta[@property='og:description']/@content") return descriptions[0] if len(descriptions) > 0 and len( descriptions[0]) > 0 else None
def _tmdb(self, letterboxd_url, language): if self.config.trace_mode: logger.debug(f"URL: {letterboxd_url}") response = self.config.get_html(letterboxd_url, headers=util.header(language)) ids = response.xpath("//a[@data-track-action='TMDb']/@href") if len(ids) > 0 and ids[0]: if "themoviedb.org/movie" in ids[0]: return util.regex_first_int(ids[0], "TMDb Movie ID") raise Failed( f"Letterboxd Error: TMDb Movie ID not found in {ids[0]}") raise Failed( f"Letterboxd Error: TMDb Movie ID not found at {letterboxd_url}")
def _ids_from_url(self, tvdb_url): ids = [] tvdb_url = tvdb_url.strip() if self.config.trace_mode: logger.debug(f"URL: {tvdb_url}") if tvdb_url.startswith((urls["list"], urls["alt_list"])): try: response = self.config.get_html(tvdb_url, headers=util.header( self.tvdb_language)) items = response.xpath( "//div[@class='col-xs-12 col-sm-12 col-md-8 col-lg-8 col-md-pull-4']/div[@class='row']" ) for item in items: title = item.xpath( ".//div[@class='col-xs-12 col-sm-9 mt-2']//a/text()" )[0] item_url = item.xpath( ".//div[@class='col-xs-12 col-sm-9 mt-2']//a/@href")[0] if item_url.startswith("/series/"): try: ids.append( (self.get_series(f"{base_url}{item_url}").id, "tvdb")) except Failed as e: logger.error(f"{e} for series {title}") elif item_url.startswith("/movies/"): try: movie = self.get_movie(f"{base_url}{item_url}") if movie.tmdb_id: ids.append((movie.tmdb_id, "tmdb")) elif movie.imdb_id: ids.append((movie.imdb_id, "imdb")) except Failed as e: logger.error(e) else: logger.error(f"TVDb Error: Skipping Movie: {title}") time.sleep(2) if len(ids) > 0: return ids raise Failed(f"TVDb Error: No TVDb IDs found at {tvdb_url}") except requests.exceptions.MissingSchema: util.print_stacktrace() raise Failed(f"TVDb Error: URL Lookup Failed for {tvdb_url}") else: raise Failed( f"TVDb Error: {tvdb_url} must begin with {urls['list']}")
def _ids_from_url(self, imdb_url, language, limit): total, item_count = self._total(imdb_url, language) headers = util.header(language) imdb_ids = [] parsed_url = urlparse(imdb_url) params = parse_qs(parsed_url.query) imdb_base = parsed_url._replace(query=None).geturl() params.pop("start", None) # noqa params.pop("count", None) # noqa params.pop("page", None) # noqa if self.config.trace_mode: logger.debug(f"URL: {imdb_base}") logger.debug(f"Params: {params}") search_url = imdb_base.startswith(urls["searches"]) if limit < 1 or total < limit: limit = total remainder = limit % item_count if remainder == 0: remainder = item_count num_of_pages = math.ceil(int(limit) / item_count) for i in range(1, num_of_pages + 1): start_num = (i - 1) * item_count + 1 logger.ghost( f"Parsing Page {i}/{num_of_pages} {start_num}-{limit if i == num_of_pages else i * item_count}" ) if search_url: params[ "count"] = remainder if i == num_of_pages else item_count # noqa params["start"] = start_num # noqa else: params["page"] = i # noqa response = self.config.get_html(imdb_base, headers=headers, params=params) ids_found = response.xpath( "//div[contains(@class, 'lister-item-image')]//a/img//@data-tconst" ) if not search_url and i == num_of_pages: ids_found = ids_found[:remainder] imdb_ids.extend(ids_found) time.sleep(2) logger.exorcise() if len(imdb_ids) > 0: logger.debug(f"{len(imdb_ids)} IMDb IDs Found: {imdb_ids}") return imdb_ids raise Failed(f"IMDb Error: No IMDb IDs Found at {imdb_url}")
def _parse_list(self, list_url, language): if self.config.trace_mode: logger.debug(f"URL: {list_url}") response = self.config.get_html(list_url, headers=util.header(language)) letterboxd_ids = response.xpath( "//li[contains(@class, 'poster-container')]/div/@data-film-id") items = [] for letterboxd_id in letterboxd_ids: slugs = response.xpath( f"//div[@data-film-id='{letterboxd_id}']/@data-film-slug") items.append((letterboxd_id, slugs[0])) next_url = response.xpath("//a[@class='next']/@href") if len(next_url) > 0: time.sleep(2) items.extend(self._parse_list(f"{base_url}{next_url[0]}", language)) return items
def _total(self, imdb_url, language): if imdb_url.startswith(urls["lists"]): xpath_total = "//div[@class='desc lister-total-num-results']/text()" per_page = 100 elif imdb_url.startswith(urls["searches"]): xpath_total = "//div[@class='desc']/span/text()" per_page = 250 else: xpath_total = "//div[@class='desc']/text()" per_page = 50 results = self.config.get_html( imdb_url, headers=util.header(language)).xpath(xpath_total) total = 0 for result in results: if "title" in result: try: total = int( re.findall("(\\d+) title", result.replace(",", ""))[0]) break except IndexError: pass if total > 0: return total, per_page raise Failed(f"IMDb Error: Failed to parse URL: {imdb_url}")
def _request(self, url, language, xpath): if self.config.trace_mode: logger.debug(f"URL: {url}") return self.config.get_html(url, headers=util.header(language)).xpath(xpath)
def __init__(self, tvdb_url, language, is_movie, config): self.tvdb_url = tvdb_url.strip() self.language = language self.is_movie = is_movie self.config = config if not self.is_movie and self.tvdb_url.startswith( (urls["series"], urls["alt_series"], urls["series_id"])): self.media_type = "Series" elif self.is_movie and self.tvdb_url.startswith( (urls["movies"], urls["alt_movies"], urls["movie_id"])): self.media_type = "Movie" else: raise Failed( f"TVDb Error: {self.tvdb_url} must begin with {urls['movies'] if self.is_movie else urls['series']}" ) if self.config.trace_mode: logger.debug(f"URL: {tvdb_url}") response = self.config.get_html(self.tvdb_url, headers=util.header(self.language)) results = response.xpath( f"//*[text()='TheTVDB.com {self.media_type} ID']/parent::node()/span/text()" ) if len(results) > 0: self.id = int(results[0]) elif self.tvdb_url.startswith(urls["movie_id"]): raise Failed( f"TVDb Error: Could not find a TVDb Movie using TVDb Movie ID: {self.tvdb_url[len(urls['movie_id']):]}" ) elif self.tvdb_url.startswith(urls["series_id"]): raise Failed( f"TVDb Error: Could not find a TVDb Series using TVDb Series ID: {self.tvdb_url[len(urls['series_id']):]}" ) else: raise Failed( f"TVDb Error: Could not find a TVDb {self.media_type} ID at the URL {self.tvdb_url}" ) def parse_page(xpath): parse_results = response.xpath(xpath) if len(parse_results) > 0: parse_results = [ r.strip() for r in parse_results if len(r) > 0 ] return parse_results[0] if len(parse_results) > 0 else None def parse_title_summary(lang=None): place = "//div[@class='change_translation_text' and " place += f"@data-language='{lang}']" if lang else "not(@style='display:none')]" return parse_page(f"{place}/@data-title"), parse_page( f"{place}/p/text()[normalize-space()]") self.title, self.summary = parse_title_summary(lang=self.language) if not self.title and self.language in language_translation: self.title, self.summary = parse_title_summary( lang=language_translation[self.language]) if not self.title: self.title, self.summary = parse_title_summary() if not self.title: raise Failed( f"TVDb Error: Name not found from TVDb URL: {self.tvdb_url}") self.poster_path = parse_page( "//div[@class='row hidden-xs hidden-sm']/div/img/@src") self.background_path = parse_page( "(//h2[@class='mt-4' and text()='Backgrounds']/following::div/a/@href)[1]" ) if self.is_movie: self.directors = parse_page( "//strong[text()='Directors']/parent::li/span/a/text()[normalize-space()]" ) self.writers = parse_page( "//strong[text()='Writers']/parent::li/span/a/text()[normalize-space()]" ) self.studios = parse_page( "//strong[text()='Studio']/parent::li/span/a/text()[normalize-space()]" ) else: self.networks = parse_page( "//strong[text()='Networks']/parent::li/span/a/text()[normalize-space()]" ) self.genres = parse_page( "//strong[text()='Genres']/parent::li/span/a/text()[normalize-space()]" ) tmdb_id = None imdb_id = None if self.is_movie: results = response.xpath("//*[text()='TheMovieDB.com']/@href") if len(results) > 0: try: tmdb_id = util.regex_first_int(results[0], "TMDb ID") except Failed: pass results = response.xpath("//*[text()='IMDB']/@href") if len(results) > 0: try: imdb_id = util.get_id_from_imdb_url(results[0]) except Failed: pass if tmdb_id is None and imdb_id is None: raise Failed( f"TVDB Error: No TMDb ID or IMDb ID found for {self.title}" ) self.tmdb_id = tmdb_id self.imdb_id = imdb_id