def login(self): logger.info('Logging in') self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers.items()) res = self.session.get(self.loginpage) bsoup = ParserBeautifulSoup(res.content, ['lxml']) _allinputs = bsoup.findAll('input') fields = {} for field in _allinputs: fields[field.get('name')] = field.get('value') fields['username'] = self.username fields['password'] = self.password fields['autologin'] = '******' fields['viewonline'] = 'on' self.headers['Referer'] = self.loginpage self.session.headers.update(self.headers.items()) res = self.session.post(self.loginpage, fields) try: logger.debug('Got session id %s' % self.session.cookies.get_dict()['PHPSESSID']) except KeyError as e: logger.error(repr(e)) logger.error("Didn't get session id, check your credentials") return False except Exception as e: logger.error(repr(e)) logger.error('uncached error #legendasdivx #AA') return False return True
def _get_show_ids(self): # get the shows page logger.info('Getting show ids') r = self.session.get(self.server_url + self.all_series_url, timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # populate the show ids show_ids = {} for show_category in soup.findAll('seriesl'): if show_category.attrs['category'] == u'Σειρές': for show in show_category.findAll('series'): series = show.text series_match = series_sanitize_re.match(series) if series_match: series = series_match.group(1) show_ids[sanitize(series)] = int(show['srsid']) break logger.debug('Found %d show ids', len(show_ids)) return show_ids
def login(self): logger.debug('Legendasdivx.pt :: Logging in') try: # sleep for a 1 second before another request sleep(1) res = self.session.get(self.loginpage) res.raise_for_status() bsoup = ParserBeautifulSoup(res.content, ['lxml']) _allinputs = bsoup.findAll('input') data = {} # necessary to set 'sid' for POST request for field in _allinputs: data[field.get('name')] = field.get('value') # sleep for a 1 second before another request sleep(1) data['username'] = self.username data['password'] = self.password res = self.session.post(self.loginpage, data) res.raise_for_status() # make sure we're logged in logger.debug( 'Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s', self.session.cookies.get_dict()['PHPSESSID']) cj = self.session.cookies.copy() store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k", "phpbb3_2z8zs_u", "lang") for cn in iter(self.session.cookies.keys()): if cn not in store_cks: del cj[cn] # store session cookies on cache logger.debug( "Legendasdivx.pt :: Storing legendasdivx session cookies: %r", cj) region.set("legendasdivx_cookies2", cj) except KeyError: logger.error( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) raise AuthenticationError( "Legendasdivx.pt :: Couldn't get session ID, check your credentials" ) except HTTPError as e: if "bloqueado" in res.text.lower(): logger.error( "LegendasDivx.pt :: Your IP is blocked on this server.") raise IPAddressBlocked( "LegendasDivx.pt :: Your IP is blocked on this server.") logger.error("Legendasdivx.pt :: HTTP Error %s", e) raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e) except Exception as e: logger.error("LegendasDivx.pt :: Uncaught error: %r", e) raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r", e)
def query(self, video, language): try: logger.debug('Got session id %s' % self.session.cookies.get_dict()['PHPSESSID']) except Exception as e: self.login() return [] language_ids = '0' if isinstance(language, (tuple, list, set)): if len(language) == 1: language_ids = ','.join( sorted(l.opensubtitles for l in language)) if language_ids == 'por': language_ids = '&form_cat=28' else: language_ids = '&form_cat=29' querytext = video.name querytext = os.path.basename(querytext) querytext, _ = os.path.splitext(querytext) videoname = querytext querytext = querytext.lower() querytext = querytext.replace(".", "+").replace("[", "").replace("]", "") if language_ids != '0': querytext = querytext + language_ids self.headers['Referer'] = self.site + '/index.php' self.session.headers.update(self.headers.items()) res = self.session.get(self.searchurl.format(query=querytext)) # form_cat=28 = br # form_cat=29 = pt if "A legenda não foi encontrada" in res.text: logger.warning('%s not found', querytext) return [] bsoup = ParserBeautifulSoup(res.content, ['html.parser']) _allsubs = bsoup.findAll("div", {"class": "sub_box"}) subtitles = [] lang = Language.fromopensubtitles("pob") for _subbox in _allsubs: hits = 0 for th in _subbox.findAll("th", {"class": "color2"}): if th.string == 'Hits:': hits = int(th.parent.find("td").string) if th.string == 'Idioma:': lang = th.parent.find("td").find("img").get('src') if 'brazil' in lang: lang = Language.fromopensubtitles('pob') else: lang = Language.fromopensubtitles('por') description = _subbox.find("td", {"class": "td_desc brd_up"}) download = _subbox.find("a", {"class": "sub_download"}) try: # sometimes BSoup just doesn't get the link logger.debug(download.get('href')) except Exception as e: logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext)) continue exact_match = False if video.name.lower() in description.get_text().lower(): exact_match = True data = { 'link': self.site + '/modules.php' + download.get('href'), 'exact_match': exact_match, 'hits': hits, 'videoname': videoname, 'description': description.get_text() } subtitles.append(LegendasdivxSubtitle(lang, video, data)) return subtitles
def query(self, languages=None, title=None, imdb_id=None, video=None): subtitles = [] params = self.getQueryParams(imdb_id, title) search_response = self.session.post(self.api_url, data=params, timeout=15) search_response.raise_for_status() soup = ParserBeautifulSoup( search_response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('div[id="round"]') if len(rows) == 0: logger.debug('No data returned from provider') return [] # release comments are outside of the parent for the sub details itself, so we just map it to another list comment_rows = soup.findAll('div', attrs={ 'class': None, 'id': None, 'align': None }) for index, row in enumerate(rows): result_anchor_el = row.select_one('.buton').select('a') # Download link href = result_anchor_el[0]['href'] download_link = self.server_url + href fullTitle = row.select_one('#content-main a').text # Get title try: title = fullTitle.split("(")[0] except: logger.error("Error parsing title") # Get Uploader try: uploader = row.select('#content-main p')[4].text[10:] except: logger.error("Error parsing uploader") # Get downloads count downloads = 0 try: downloads = int(row.select_one('#content-right p').text[12:]) except: logger.error("Error parsing downloads") # Get year try: year = int(fullTitle.split("(")[1].split(")")[0]) except: year = None logger.error("Error parsing year") # Get imdbId sub_imdb_id = self.getImdbIdFromSubtitle(row) comments = '' try: comments = comment_rows[index].text logger.debug('Comments: {}'.format(comments)) except: logger.error("Error parsing comments") # Get Page Link try: page_link = row.select_one('#content-main a')['href'] except: logger.error("Error parsing page_link") episode_number = video.episode if isinstance(video, Episode) else None subtitle = self.subtitle_class(next(iter(languages)), download_link, index, comments, title, sub_imdb_id, uploader, page_link, year, downloads, isinstance(video, Episode), episode_number) logger.debug('Found subtitle %r', str(subtitle)) subtitles.append(subtitle) ordered_subs = self.order(subtitles) return ordered_subs
def query(self, show_id, series, season, year=None, country=None): # get the season list of the show logger.info('Getting the season list of show id %d', show_id) r = self.session.get(self.server_url + self.series_url.format(show_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) series = soup.find('name').text # loop over season rows seasons = soup.findAll('series_group') season_id = None for season_row in seasons: try: parsed_season = int(season_row['ssnnum']) if parsed_season == season: season_id = int(season_row['ssnid']) break except (ValueError, TypeError): continue if season_id is None: logger.debug('Season not found in provider') return [] # get the subtitle list of the season logger.info('Getting the subtitle list of season %d', season) r = self.session.get( self.server_url + self.season_url.format(show_id=show_id, season=season_id), timeout=10) r.raise_for_status() if not r.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) subtitles = [] # loop over episode rows for subtitle_group in soup.findAll('subg'): # read the episode info episode_info = subtitle_group.find('etitle') if episode_info is None: continue episodes = [] episode_match = episode_re.match(episode_info['number']) if episode_match: episodes = [ int(e) for e in [episode_match.group(1), episode_match.group(3)] if e ] subtitle_info = subtitle_group.find('sgt') if subtitle_info is None: continue season = int(subtitle_info['ssnnum']) episode_id = int(subtitle_info['epsid']) # filter out unreleased subtitles for subs_tag in subtitle_group.findAll('sr'): if subs_tag['published_on'] == '': continue page_link = self.server_url + self.page_link.format( show_id=show_id, season_id=season_id, season=season, episode=episode_id) title = episode_info['title'] version = subs_tag.fmt.text + ' ' + subs_tag.team.text download_link = self.server_url + self.download_link.format( int(subs_tag['rlsid'])) for episode in episodes: subtitle = self.subtitle_class(Language.fromalpha2('el'), page_link, series, season, episode, year, title, version, download_link) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) return subtitles