def test_save_subtitles(movies, tmpdir, monkeypatch): monkeypatch.chdir(str(tmpdir)) tmpdir.ensure(movies['man_of_steel'].name) subtitle_no_content = Subtitle(Language('eng')) subtitle = Subtitle(Language('fra')) subtitle.content = b'Some content' subtitle_other = Subtitle(Language('fra')) subtitle_other.content = b'Some other content' subtitle_pt_br = Subtitle(Language('por', 'BR')) subtitle_pt_br.content = b'Some brazilian content' subtitles = [subtitle_no_content, subtitle, subtitle_other, subtitle_pt_br] save_subtitles(movies['man_of_steel'], subtitles) # subtitle without content is skipped path = os.path.join( str(tmpdir), os.path.splitext(movies['man_of_steel'].name)[0] + '.en.srt') assert not os.path.exists(path) # first subtitle with language is saved path = os.path.join( str(tmpdir), os.path.splitext(movies['man_of_steel'].name)[0] + '.fr.srt') assert os.path.exists(path) assert io.open(path, 'rb').read() == b'Some content' # ietf language in path path = os.path.join( str(tmpdir), os.path.splitext(movies['man_of_steel'].name)[0] + '.pt-BR.srt') assert os.path.exists(path) assert io.open(path, 'rb').read() == b'Some brazilian content'
def test_subtitle_is_valid_valid(monkeypatch): subtitle = Subtitle(Language('fra')) text = ('1\n' '00:00:20,000 --> 00:00:24,400\n' 'En réponse à l\'augmentation de la criminalité\n' 'dans certains quartiers,\n') monkeypatch.setattr(Subtitle, 'text', text) assert subtitle.is_valid() is True
def test_subtitle_text_guess_encoding_none(): content = b'\x00d\x00\x80\x00\x00\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00d\x00d\x00\x00\x02s\x84\x8f\xa9' subtitle = Subtitle(Language('zho'), False, None, None) subtitle.content = content assert subtitle.guess_encoding() is None assert not subtitle.is_valid() assert not isinstance(subtitle.text, six.text_type)
def test_subtitle_is_valid_invalid(monkeypatch): subtitle = Subtitle(Language('fra')) text = ('1\n' '00:00:20,000 --> 00:00:24,400\n' 'En réponse à l\'augmentation de la criminalité\n' 'dans certains quartiers,\n\n') text += 'This line shouldn\'t be here' monkeypatch.setattr(Subtitle, 'text', text) assert subtitle.is_valid() is False
def test_save_subtitles_single_directory_encoding(movies, tmpdir): subtitle = Subtitle(Language('jpn')) subtitle.content = u'ハローワールド'.encode('shift-jis') subtitle_pt_br = Subtitle(Language('por', 'BR')) subtitle_pt_br.content = b'Some brazilian content' subtitles = [subtitle, subtitle_pt_br] save_subtitles(movies['man_of_steel'], subtitles, single=True, directory=str(tmpdir), encoding='utf-8') # first subtitle only and correctly encoded path = os.path.join(str(tmpdir), os.path.splitext(os.path.split(movies['man_of_steel'].name)[1])[0] + '.srt') assert os.path.exists(path) assert io.open(path, encoding='utf-8').read() == u'ハローワールド'
def query(self, name, season, episode, release_group, filepath, languages=None): searchname = name.lower().replace(' ', '_') if isinstance(searchname, unicode): searchname = searchname.encode('utf-8') searchurl = '%s/serie/%s/%s/%s/%s' % ( self.server_url, urllib2.quote(searchname), season, episode, urllib2.quote(searchname)) self.logger.debug(u'Searching in %s' % searchurl) try: req = urllib2.Request(searchurl, headers={'User-Agent': self.user_agent}) page = urllib2.urlopen(req, timeout=self.timeout) except urllib2.HTTPError as inst: self.logger.info(u'Error: %s - %s' % (searchurl, inst)) return [] except urllib2.URLError as inst: self.logger.info(u'TimeOut: %s' % inst) return [] soup = BeautifulSoup(page.read()) sublinks = [] for html_sub in soup('td', {'class': 'NewsTitle', 'colspan': '3'}): if not self.release_pattern.match(str( html_sub.contents[1])): # On not needed soup td result continue sub_teams = self.listTeams([ self.release_pattern.match(str( html_sub.contents[1])).groups()[0].lower() ], ['.', '_', ' ', '/', '-']) if not release_group.intersection(sub_teams): # On wrong team continue html_language = html_sub.findNext('td', {'class': 'language'}) sub_language = self.getRevertLanguage( html_language.contents[0].strip().replace(' ', '')) if languages and not sub_language in languages: # On wrong language continue html_status = html_language.findNextSibling('td') sub_status = html_status.find('b').string.strip() if not sub_status == 'Completed': # On not completed subtitles continue sub_link = self.server_url + html_status.findNextSibling( 'td', { 'colspan': '3' }).find('a')['href'] self.logger.debug(u'Found a match with teams: %s' % sub_teams) result = Subtitle(filepath, self.getSubtitlePath(filepath, sub_language), self.__class__.__name__, sub_language, sub_link, keywords=sub_teams) sublinks.append(result) sublinks.sort(self._cmpReleaseGroup) return sublinks
def test_compute_subtitle_path__multi_with_und_language(monkeypatch): # Given monkeypatch.setattr(app, 'SUBTITLES_MULTI', True) subtitle = Subtitle(language=Language('und')) video_path = '/folder/subfolder/video.mkv' subtitles_dir = None # When actual = sut.compute_subtitle_path(subtitle, video_path, subtitles_dir) # Then assert '/folder/subfolder/video.srt' == actual
def test_compute_subtitle_path__single_with_valid_language_and_subs_folder(monkeypatch): # Given monkeypatch.setattr(app, 'SUBTITLES_MULTI', False) subtitle = Subtitle(language=Language('por', 'BR')) video_path = '/folder/subfolder/video.mkv' subtitles_dir = '/folder/subtitles' # When actual = sut.compute_subtitle_path(subtitle, video_path, subtitles_dir) # Then assert os.path.normpath('/folder/subtitles/video.srt') == os.path.normpath(actual)
def test_compute_subtitle_path__single_with_valid_language(monkeypatch): # Given monkeypatch.setattr('medusa.SUBTITLES_MULTI', False) subtitle = Subtitle(language=Language('por', 'BR')) video_path = '/folder/subfolder/video.mkv' subtitles_dir = None # When actual = sut.compute_subtitle_path(subtitle, video_path, subtitles_dir) # Then assert '/folder/subfolder/video.srt' == actual
def test_save_subtitles(movies, tmpdir, monkeypatch): monkeypatch.chdir(str(tmpdir)) tmpdir.ensure(movies['man_of_steel'].name) subtitle_no_content = Subtitle(Language('eng')) subtitle = Subtitle(Language('fra')) subtitle.content = b'Some content' subtitle_other = Subtitle(Language('fra')) subtitle_other.content = b'Some other content' subtitle_pt_br = Subtitle(Language('por', 'BR')) subtitle_pt_br.content = b'Some brazilian content' subtitles = [subtitle_no_content, subtitle, subtitle_other, subtitle_pt_br] save_subtitles(movies['man_of_steel'], subtitles) # subtitle without content is skipped path = os.path.join(str(tmpdir), os.path.splitext(movies['man_of_steel'].name)[0] + '.en.srt') assert not os.path.exists(path) # first subtitle with language is saved path = os.path.join(str(tmpdir), os.path.splitext(movies['man_of_steel'].name)[0] + '.fr.srt') assert os.path.exists(path) assert io.open(path, 'rb').read() == b'Some content' # ietf language in path path = os.path.join(str(tmpdir), os.path.splitext(movies['man_of_steel'].name)[0] + '.pt-BR.srt') assert os.path.exists(path) assert io.open(path, 'rb').read() == b'Some brazilian content'
def query(self, name, season, episode, keywords, filepath, languages): sublinks = [] # get the show id show_name = name.lower() if show_name in self.exceptions: # get it from exceptions show_id = self.exceptions[show_name] elif show_name in self.showids: # get it from cache show_id = self.showids[show_name] else: # retrieve it show_name_encoded = show_name if isinstance(show_name_encoded, unicode): show_name_encoded = show_name_encoded.encode('utf-8') show_id_url = '%sGetShowByName/%s' % (self.server_url, urllib2.quote(show_name_encoded)) self.logger.debug(u'Retrieving show id from web at %s' % show_id_url) page = urllib2.urlopen(show_id_url) dom = minidom.parse(page) if not dom or len(dom.getElementsByTagName('showid')) == 0: # no proper result page.close() return [] show_id = dom.getElementsByTagName('showid')[0].firstChild.data self.showids[show_name] = show_id with self.lock: f = open(self.showid_cache, 'w') self.logger.debug(u'Writing showid %s to cache file' % show_id) pickle.dump(self.showids, f) f.close() page.close() # get the subs for the show id we have for language in languages: subs_url = '%sGetAllSubsFor/%s/%s/%s/%s' % (self.server_url, show_id, season, episode, language) self.logger.debug(u'Getting subtitles at %s' % subs_url) page = urllib2.urlopen(subs_url) dom = minidom.parse(page) page.close() for sub in dom.getElementsByTagName('result'): sub_filename = sub.getElementsByTagName('filename')[0].firstChild.data if not sub_filename.endswith(tuple(EXTENSIONS)): sub_filename = sub_filename + EXTENSIONS[0] subtitle = Subtitle.factory(sub_filename) subtitle.link = sub.getElementsByTagName('downloadlink')[0].firstChild.data subtitle.path = self.getSubtitlePath(filepath, language) subtitle.plugin = self.__class__.__name__ subtitle.language = language sublinks.append(subtitle) sublinks.sort(self._cmpReleaseGroup) return sublinks
def query(self, filepath, moviehash, languages): searchurl = '%s/?action=%s&hash=%s' % (self.server_url, 'search', moviehash) self.logger.debug(u'Query URL: %s' % searchurl) try: req = urllib2.Request(searchurl, headers={'User-Agent': self.user_agent}) page = urllib2.urlopen(req, timeout=self.timeout) except urllib2.HTTPError as inst: if inst.code == 404: # no result found return [] self.logger.error(u'Error: %s - %s' % (searchurl, inst)) return [] except urllib2.URLError as inst: self.logger.error(u'TimeOut: %s' % inst) return [] available_languages = page.readlines()[0].split(',') self.logger.debug(u'Available languages: %s' % available_languages) subs = [] for l in available_languages: if l in languages: result = Subtitle(filepath, self.getSubtitlePath(filepath, l), self.__class__.__name__, l, '%s/?action=download&hash=%s&language=%s' % (self.server_url, moviehash, l)) subs.append(result) return subs
def get_results(self, token, search, filepath): self.logger.debug(u'Query uses token %s and search parameters %s' % (token, search)) try: results = self.server.SearchSubtitles(token, [search]) except Exception: self.logger.debug(u'Cannot query the server') return [] if not results['data']: # no subtitle found return [] sublinks = [] self.filename = self.getFileName(filepath) for r in sorted(results['data'], self._cmpSubFileName): result = Subtitle(filepath, self.getSubtitlePath(filepath, self.getRevertLanguage(r['SubLanguageID'])), self.__class__.__name__, self.getRevertLanguage(r['SubLanguageID']), r['SubDownloadLink'], r['SubFileName']) if 'query' in search: # query mode search, filter results query_encoded = search['query'] if isinstance(query_encoded, unicode): query_encoded = unicodedata.normalize('NFKD', query_encoded).encode('ascii', 'ignore') if not r['MovieReleaseName'].replace('.', ' ').lower().startswith(query_encoded): self.logger.debug(u'Skipping %s it does not start with %s' % (r['MovieReleaseName'].replace('.', ' ').lower(), query_encoded)) continue sublinks.append(result) return sublinks
def query(self, name, season, episode, release_group, filepath, languages): sublinks = [] searchname = name.lower().replace(' ', '_') if isinstance(searchname, unicode): searchname = searchname.encode('utf-8') searchurl = '%s/serie/%s/%s/%s/' % (self.server_url, urllib2.quote(searchname), season, episode) self.logger.debug(u'Searching in %s' % searchurl) try: req = urllib2.Request(searchurl, headers={'User-Agent': self.user_agent}) page = urllib2.urlopen(req, timeout=self.timeout) except urllib2.HTTPError as inst: self.logger.info(u'Error: %s - %s' % (searchurl, inst)) return [] except urllib2.URLError as inst: self.logger.info(u'TimeOut: %s' % inst) return [] soup = BeautifulSoup(page.read()) for subs in soup('td', {'class': 'NewsTitle'}): sub_teams = self.listTeams([self.release_pattern.search('%s' % subs.contents[1]).group(1).lower()], ['.', '_', ' ', '/', '-']) if not release_group.intersection(sub_teams): # On wrong team continue self.logger.debug(u'Team from website: %s' % sub_teams) self.logger.debug(u'Team from file: %s' % release_group) for html_language in subs.parent.parent.findAll('td', {'class': 'language'}): sub_language = self.getRevertLanguage(html_language.string.strip()) self.logger.debug(u'Subtitle reverted language: %s' % sub_language) if not sub_language in languages: # On wrong language continue html_status = html_language.findNextSibling('td') sub_status = html_status.find('strong').string.strip() if not sub_status == 'Completed': # On not completed subtitles continue sub_link = html_status.findNext('td').find('a')['href'] result = Subtitle(filepath, self.getSubtitlePath(filepath, sub_language), self.__class__.__name__, sub_language, self.server_url + sub_link, keywords=sub_teams) sublinks.append(result) sublinks.sort(self._cmpReleaseGroup) return sublinks
def query(self, name, season, episode, release_group, filepath, languages): sublinks = [] searchname = name.lower().replace(' ', '-') if isinstance(searchname, unicode): searchname = unicodedata.normalize('NFKD', searchname).encode('ascii','ignore') searchurl = '%s/%s/%sx%.2d' % (self.server_url, urllib2.quote(searchname), season, episode) self.logger.debug(u'Searching in %s' % searchurl) try: req = urllib2.Request(searchurl, headers={'User-Agent': self.user_agent}) page = urllib2.urlopen(req, timeout=self.timeout) except urllib2.HTTPError as inst: self.logger.info(u'Error: %s - %s' % (searchurl, inst)) return [] except urllib2.URLError as inst: self.logger.info(u'TimeOut: %s' % inst) return [] soup = BeautifulSoup(page.read()) for subs in soup('div', {'id': 'version'}): version = subs.find('p', {'class': 'title-sub'}) sub_teams = self.listTeams([self.release_pattern.search('%s' % version.contents[1]).group(1).lower()], ['.', '_', ' ', '/', '-']) self.logger.debug(u'Team from website: %s' % sub_teams) self.logger.debug(u'Team from file: %s' % release_group) if not release_group.intersection(sub_teams): # On wrong team continue for html_language in subs.findAllNext('ul', {'class': 'sslist'}): sub_language = self.getRevertLanguage(html_language.findNext('li', {'class': 'li-idioma'}).find('strong').contents[0].string.strip()) if not sub_language in languages: # On wrong language continue html_status = html_language.findNext('li', {'class': 'li-estado green'}) sub_status = html_status.contents[0].string.strip() if not sub_status == 'Completado': # On not completed subtitles continue sub_link = html_status.findNext('span', {'class': 'descargar green'}).find('a')['href'] result = Subtitle(filepath, self.getSubtitlePath(filepath, sub_language), self.__class__.__name__, sub_language, sub_link, keywords=sub_teams) sublinks.append(result) sublinks.sort(self._cmpReleaseGroup) return sublinks
def test_subtitle_is_valid_no_content(): subtitle = Subtitle(Language('fra')) assert subtitle.is_valid() is False
def test_subtitle_guess_encoding_utf8(): subtitle = Subtitle(Language('zho'), False, None, None) subtitle.content = b'Something here' assert subtitle.guess_encoding() == 'utf-8' assert isinstance(subtitle.text, six.text_type)
def test_subtitle_valid_encoding(): subtitle = Subtitle(Language('deu'), False, None, 'windows-1252') assert subtitle.encoding == 'cp1252'
def test_subtitle_empty_encoding(): subtitle = Subtitle(Language('deu'), False, None, None) assert subtitle.encoding is None
def test_subtitle_invalid_encoding(): subtitle = Subtitle(Language('deu'), False, None, 'rubbish') assert subtitle.encoding is None
def test_subtitle_text_no_content(): subtitle = Subtitle(Language('eng')) assert subtitle.text is None
def test_subtitle_text(): subtitle = Subtitle(Language('eng')) subtitle.content = b'Some ascii text' assert subtitle.text == 'Some ascii text'
def create(language, **kwargs): target = Subtitle(Language.fromopensubtitles(language)) return _patch_object(monkeypatch, target, **kwargs)