def download_subtitle(self, subtitle): if isinstance(subtitle, Subs4FreeSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) download_element = soup.select_one('input[name="id"]') image_element = soup.select_one('input[type="image"]') subtitle_id = download_element[ 'value'] if download_element else None width = int(str( image_element['width']).strip('px')) if image_element else 0 height = int(str( image_element['height']).strip('px')) if image_element else 0 if not subtitle_id: logger.debug( 'Unable to download subtitle. No download link found') return self.apply_anti_block(subtitle) download_url = self.server_url + self.download_url r = self.session.post(download_url, data={ 'id': subtitle_id, 'x': random.randint(0, width), 'y': random.randint(0, height) }, headers={'Referer': subtitle.download_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive( archive) if archive else r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
def download_subtitle(self, subtitle): if isinstance(subtitle, Subs4SeriesSubtitle): # download the subtitle logger.info('Downloading subtitle %r', subtitle) r = self.session.get(subtitle.download_link, headers={'Referer': subtitle.page_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) download_element = soup.select_one('a.style55ws') if not download_element: download_element = soup.select_one('form[method="post"]') target = download_element[ 'action'] if download_element else None else: target = download_element['href'] if not target: logger.debug( 'Unable to download subtitle. No download link found') return self.apply_anti_block(subtitle) download_url = self.server_url + target r = self.session.get(download_url, headers={'Referer': subtitle.download_link}, timeout=10) r.raise_for_status() if not r.content: logger.debug( 'Unable to download subtitle. No data returned from provider' ) return archive = _get_archive(r.content) subtitle_content = _get_subtitle_from_archive( archive) if archive else r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not extract subtitle from %r', archive)
def query(self, title): subtitles = [] r = self.session.get(self.search_url, params={'q': title}, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('.eBlock') for row in rows: result_anchor_el = row.select_one('.eTitle > a') # page link page_link = result_anchor_el.get('href') # fetch/parse additional info r = self.session.get(page_link, timeout=10) soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # title movie_titles_string = soup.select_one('.main-header').text.strip() movie_titles_list = movie_titles_string.split(' / ') title = movie_titles_list[-1] # year year = soup.select_one('#film-page-year').text.strip() # imdb id imdb_link = soup.select_one('#actors-page > a').get('href') imdb_id = imdb_link.split('/')[-2] # download link href = soup.select_one('.hvr').get('href') download_link = self.server_url + href # create/add the subitle subtitle = self.subtitle_class(Language.fromalpha2('lv'), page_link, download_link, title, year, imdb_id) logger.debug('subtitri.id.lv: Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def query(self, movie_id, title, year): # get the season list of the show logger.info('Getting the subtitle list of show id %s', movie_id) if movie_id: page_link = self.server_url + '/' + movie_id else: page_link = self.server_url + self.search_url.format(' '.join( [title, str(year)])) r = self.session.get(page_link, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['html.parser']) year = None year_element = soup.select_one('td#dates_header > table div') matches = False if year_element: matches = year_re.match(str(year_element.contents[2]).strip()) if matches: year = int(matches.group(1)) title_tag = soup.select_one('td#dates_header > table u') show_title = str(title_tag.contents[0]).strip() if title_tag else None subtitles = [] # loop over episode rows for subs_tag in soup.select('.movie-details'): # read common info version = subs_tag.find('span').text download_link = self.server_url + subs_tag.find('a')['href'] uploader = subs_tag.select_one('.movie-info').find('p').find( 'a').text language_code = subs_tag.select_one('.sprite')['class'][1].split( 'gif')[0] language = Language.fromietf(language_code) subtitle = self.subtitle_class(language, page_link, show_title, year, version, download_link, uploader) logger.debug('Found subtitle {!r}'.format(subtitle)) subtitles.append(subtitle) return subtitles
def query(self, movie_id, title, year): # get the season list of the show logger.info('Getting the subtitle list of show id %s', movie_id) if movie_id: page_link = self.server_url + '/' + movie_id else: page_link = self.server_url + text_type(self.search_url).format( ' '.join([title, str(year)])) r = self.session.get(page_link, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['html.parser']) year_num = None year_element = soup.select_one('td#dates_header > table div') matches = False if year_element: matches = year_re.match(str(year_element.contents[2]).strip()) if matches: year_num = int(matches.group(1)) title_element = soup.select_one('td#dates_header > table u') show_title = str( title_element.contents[0]).strip() if title_element else None subtitles = [] # loop over episode rows for subtitle in soup.select( 'table.table_border div[align="center"] > div'): # read common info version = subtitle.find('b').text download_link = self.server_url + subtitle.find('a')['href'] language = Language.fromalpha2( subtitle.find('img')['src'].split('/')[-1].split('.')[0]) subtitle = self.subtitle_class(language, page_link, show_title, year_num, version, download_link) logger.debug('Found subtitle {!r}'.format(subtitle)) subtitles.append(subtitle) return subtitles
def query(self, show_id, series, season, episode, title): # get the season list of the show logger.info('Getting the subtitle list of show id %s', show_id) if all((show_id, season, episode)): page_link = self.server_url + self.episode_link.format( show_id=show_id, season=season, episode=episode) else: return [] r = self.session.get(page_link, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) year = None matches = year_re.match( str(soup.select_one( '#dates_header_br > table div').contents[2]).strip()) if matches: year = int(matches.group(1)) show_title = str( soup.select_one('#dates_header_br > table div u').string).strip() subtitles = [] # loop over episode rows for subs_tag in soup.select('table .seeDark,.seeMedium'): # read common info version = subs_tag.find_all('b')[0].text download_link = self.server_url + subs_tag.find('a')['href'] uploader = subs_tag.find_all('b')[1].text language = Language.fromalpha2( subs_tag.find('img')['src'].split('/')[-1].split('.')[0]) subtitle = self.subtitle_class(language, page_link, show_title, year, version, download_link, uploader) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles
def query(self, show_id, series, season, episode, title): # get the season list of the show logger.info('Getting the subtitle list of show id %s', show_id) is_episode = False if all((show_id, season, episode)): is_episode = True page_link = self.server_url + self.episode_link.format( show_id=show_id, season=season, episode=episode) elif all((show_id, title)): page_link = self.server_url + self.movie_link.format(show_id) else: return [] r = self.session.get(page_link, timeout=10) if r.status_code == 404: return [] r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) year = None if not is_episode: year = int(soup.select_one('span.year').text) subtitles = [] # loop over episode rows for subs_tag in soup.select('div[id="subtitles"] tr[data-id]'): # read common info version = subs_tag.find('td', {'class': 'name'}).text download_link = subs_tag.find( 'a', {'class': 'btn-success'})['href'].strip('\'') # read the episode info if is_episode: episode_numbers = soup.select_one( '#summary-wrapper > div.container.summary span.main-title-sxe' ).text season = None episode = None matches = episode_re.match(episode_numbers.strip()) if matches: season = int(matches.group(1)) episode = int(matches.group(2)) series = soup.select_one( '#summary-wrapper > div.summary h2 > a').string.strip() title = soup.select_one( '#summary-wrapper > div.container.summary span.main-title' ).text subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series, season, episode, title, year, version, download_link) # read the movie info else: title = str( soup.select_one('#summary-wrapper > div.summary h1'). contents[0]).strip() subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, None, None, None, title, year, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles