def parse_results(self, raw_xml): """Parse the xml and return a list of dictionaries like: [ {'IDSubtitle': 'foo', 'LinkUseNext': 'foo', 'MovieName': 'foo_movie', ... }, {'IDSubtitle': 'foo', 'LinkUseNext': 'foo', 'MovieName': 'foo_movie', ... }, ...] """ dom = minidom.parseString(raw_xml) # Make the dom from raw xml entries = dom.getElementsByTagName( 'opensubtitles') # Pull out all entry's result_entries = [] # Make an empty container to fill up and return data = None # fetch the wanted result xml node for entry in entries: if len(entry.getElementsByTagName('results')) > 0: for result in entry.getElementsByTagName('results'): if len(result.getElementsByTagName('subtitle')) > 0: data = result.getElementsByTagName('subtitle') break break # print "data=", data if not data: return [] # catch all subtitles information for entry in data: try: sub_obj = subtitlefile.SubtitleFile(online=True) sub = {} if entry.getElementsByTagName('EpisodeName'): return if entry.getElementsByTagName('IDSubtitle'): sub['IDSubtitle'] = {'IDSubtitle': entry.getElementsByTagName('IDSubtitle')[0].firstChild.data, 'Link': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('Link'), 'LinkImdb': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('LinkImdb'), 'DownloadLink': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('DownloadLink'), 'uuid': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('uuid'), } sub_obj.setIdOnline(sub['IDSubtitle']['IDSubtitle']) if entry.getElementsByTagName('IDSubtitleFile'): sub['IDSubtitleFile'] = {'IDSubtitleFile': entry.getElementsByTagName('IDSubtitleFile')[0].firstChild.data, } sub_obj.setIdFileOnline(sub['IDSubtitleFile']['IDSubtitleFile']) if entry.getElementsByTagName('UserID'): sub['UserID'] = {'UserID': entry.getElementsByTagName('UserID')[0].firstChild.data, 'Link': entry.getElementsByTagName('UserID')[0].getAttribute('Link'), } if entry.getElementsByTagName('UserNickName') and entry.getElementsByTagName('UserNickName')[0].firstChild: sub['UserNickName'] = entry.getElementsByTagName( 'UserNickName')[0].firstChild.data sub_obj._uploader = sub['UserNickName'] if entry.getElementsByTagName('MovieID'): #sub['MovieID'] = entry.getElementsByTagName('MovieID')[0].firstChild.data sub['MovieID'] = {'MovieID': entry.getElementsByTagName('MovieID')[0].firstChild.data, 'Link': entry.getElementsByTagName('MovieID')[0].getAttribute('Link'), 'LinkImdb': entry.getElementsByTagName('MovieID')[0].getAttribute('LinkImdb'), } if entry.getElementsByTagName('MovieThumb') and entry.getElementsByTagName('MovieThumb')[0].firstChild: sub['MovieThumb'] = entry.getElementsByTagName( 'MovieThumb')[0].firstChild.data if entry.getElementsByTagName('LinkUseNext') and entry.getElementsByTagName('LinkUseNext')[0].firstChild: sub['LinkUseNext'] = entry.getElementsByTagName( 'LinkUseNext')[0].firstChild.data if entry.getElementsByTagName('LinkZoozle') and entry.getElementsByTagName('LinkZoozle')[0].firstChild: sub['LinkZoozle'] = entry.getElementsByTagName( 'LinkZoozle')[0].firstChild.data if entry.getElementsByTagName('LinkTorrentbar') and entry.getElementsByTagName('LinkTorrentbar')[0].firstChild: sub['LinkTorrentbar'] = entry.getElementsByTagName( 'LinkTorrentbar')[0].firstChild.data if entry.getElementsByTagName('LinkBoardreader') and entry.getElementsByTagName('LinkBoardreader')[0].firstChild: sub['LinkBoardreader'] = entry.getElementsByTagName( 'LinkBoardreader')[0].firstChild.data if entry.getElementsByTagName('MovieName') and entry.getElementsByTagName('MovieName')[0].firstChild: sub['MovieName'] = entry.getElementsByTagName( 'MovieName')[0].firstChild.data if entry.getElementsByTagName('MovieYear') and entry.getElementsByTagName('MovieYear')[0].firstChild: sub['MovieYear'] = entry.getElementsByTagName( 'MovieYear')[0].firstChild.data if entry.getElementsByTagName('MovieImdbRating') and entry.getElementsByTagName('MovieImdbRating')[0].firstChild: sub['MovieImdbRating'] = entry.getElementsByTagName( 'MovieImdbRating')[0].firstChild.data elif not entry.getElementsByTagName('MovieImdbRating')[0].firstChild: sub['MovieImdbRating'] = 0 if entry.getElementsByTagName('MovieImdbID') and entry.getElementsByTagName('MovieImdbID')[0].firstChild: sub['MovieImdbID'] = entry.getElementsByTagName( 'MovieImdbID')[0].firstChild.data if entry.getElementsByTagName('SubAuthorComment'): try: sub['SubAuthorComment'] = entry.getElementsByTagName( 'SubAuthorComment')[0].firstChild.data except AttributeError: sub['SubAuthorComment'] = entry.getElementsByTagName( 'SubAuthorComment')[0].firstChild if entry.getElementsByTagName('ISO639'): sub['ISO639'] = {'ISO639': entry.getElementsByTagName('ISO639')[0].firstChild.data, 'LinkSearch': entry.getElementsByTagName('ISO639')[0].getAttribute('LinkSearch'), 'flag': entry.getElementsByTagName('ISO639')[0].getAttribute('flag'), } sub_obj.setLanguage(Language.from_xx(sub['ISO639']['ISO639'])) #sub_obj._onlineId = sub['IDSubtitle']['IDSubtitle'] # It does require the Subtitle ID to downlad, not the # Subtitle File Id sub_obj.setExtraInfo( 'downloadLink', "http://www.opensubtitles.org/download/sub/%s" % sub_obj.getIdOnline()) if entry.getElementsByTagName('LanguageName') and entry.getElementsByTagName('LanguageName')[0].firstChild: sub['LanguageName'] = entry.getElementsByTagName( 'LanguageName')[0].firstChild.data if entry.getElementsByTagName('SubFormat') and entry.getElementsByTagName('SubFormat')[0].firstChild: sub['SubFormat'] = entry.getElementsByTagName( 'SubFormat')[0].firstChild.data sub_obj.setExtraInfo('format', sub['SubFormat']) if entry.getElementsByTagName('SubSumCD') and entry.getElementsByTagName('SubSumCD')[0].firstChild: sub['SubSumCD'] = entry.getElementsByTagName( 'SubSumCD')[0].firstChild.data sub_obj.setExtraInfo('totalCDs', sub['SubSumCD']) if entry.getElementsByTagName('SubAddDate') and entry.getElementsByTagName('SubAddDate')[0].firstChild: sub['SubAddDate'] = entry.getElementsByTagName( 'SubAddDate')[0].firstChild.data if entry.getElementsByTagName('SubBad') and entry.getElementsByTagName('SubBad')[0].firstChild: sub['SubBad'] = entry.getElementsByTagName( 'SubBad')[0].firstChild.data if entry.getElementsByTagName('SubRating') and entry.getElementsByTagName('SubRating')[0].firstChild: sub['SubRating'] = entry.getElementsByTagName( 'SubRating')[0].firstChild.data sub_obj.setRating(sub['SubRating']) if entry.getElementsByTagName('SubDownloadsCnt') and entry.getElementsByTagName('SubDownloadsCnt')[0].firstChild: sub['SubDownloadsCnt'] = entry.getElementsByTagName( 'SubDownloadsCnt')[0].firstChild.data sub_obj.setExtraInfo( 'totalDownloads', sub['SubDownloadsCnt']) if entry.getElementsByTagName('SubMovieAka') and entry.getElementsByTagName('SubMovieAka')[0].firstChild: sub['SubMovieAka'] = entry.getElementsByTagName( 'SubMovieAka')[0].firstChild.data if entry.getElementsByTagName('SubDate') and entry.getElementsByTagName('SubDate')[0].firstChild: sub['SubDate'] = entry.getElementsByTagName( 'SubDate')[0].firstChild.data if entry.getElementsByTagName('SubComments') and entry.getElementsByTagName('SubComments')[0].firstChild: sub['SubComments'] = entry.getElementsByTagName( 'SubComments')[0].firstChild.data if entry.getElementsByTagName('TotalSubs') and entry.getElementsByTagName('TotalSubs')[0].firstChild: sub['TotalSubs'] = entry.getElementsByTagName( 'TotalSubs')[0].firstChild.data if entry.getElementsByTagName('Newest') and entry.getElementsByTagName('Newest')[0].firstChild: sub['Newest'] = entry.getElementsByTagName( 'Newest')[0].firstChild.data if sub: # result_entries.append(sub) temp_movie = Movie(sub) movie_exists = False for movie in result_entries: if movie.MovieId == temp_movie.MovieId: movie_exists = True if hasattr(sub_obj, "_extraInfo") and sub_obj._extraInfo: movie.subtitles.append(sub_obj) # already_movie = result_entries.pop(result_entries.index(movie)) # temp_movie.subtitles = already_movie.subtitles if not movie_exists: if hasattr(sub_obj, "_extraInfo") and sub_obj._extraInfo: temp_movie.subtitles.append(sub_obj) result_entries.append(temp_movie) except IndexError as e: pass return result_entries
def _xml_to_subtitles(self, xml): subtitle_entries, nb_so_far, nb_provider = self._extract_subtitle_entries( xml) if subtitle_entries is None: return None, None, None subtitles = [] for subtitle_entry in subtitle_entries: try: ads_entries = subtitle_entry.getElementsByTagName( 'ads1') or subtitle_entry.getElementsByTagName('ads2') if ads_entries: continue def try_get_first_child_data(key, default): try: return subtitle_entry.getElementsByTagName( key)[0].firstChild.data except (AttributeError, IndexError): return default subtitle_id_entry = subtitle_entry.getElementsByTagName( 'IDSubtitle')[0] subtitle_id = subtitle_id_entry.firstChild.data subtitle_link = 'http://www.opensubtitles.org' + subtitle_id_entry.getAttribute( 'Link') subtitle_uuid = subtitle_id_entry.getAttribute('uuid') subtitlefile_id = subtitle_entry.getElementsByTagName( 'IDSubtitleFile')[0].firstChild.data user_entry = subtitle_entry.getElementsByTagName('UserID')[0] user_id = int(user_entry.firstChild.data) # user_link = 'http://www.opensubtitles.org' + user_entry.getAttribute('Link') user_nickname = try_get_first_child_data('UserNickName', None) # comment = try_get_first_child_data(''SubAuthorComment', None) language_entry = subtitle_entry.getElementsByTagName( 'ISO639')[0] language_iso639 = language_entry.firstChild.data # language_link_search = 'http://www.opensubtitles.org' + language_entry.getAttribute('LinkSearch') # language_flag = 'http:' + language_entry.getAttribute('flag') # language_name = try_get_first_child_data('LanguageName', None) subtitle_format = try_get_first_child_data('SubFormat', 'srt') # subtitle_nbcds = int(try_get_first_child_data('SubSumCD', -1)) subtitle_add_date_locale = subtitle_entry.getElementsByTagName( 'SubAddDate')[0].getAttribute('locale') subtitle_add_date = datetime.datetime.strptime( subtitle_add_date_locale, '%d/%m/%Y %H:%M:%S') # subtitle_bad = int(subtitle_entry.getElementsByTagName('SubBad')[0].firstChild.data) subtitle_rating = float( subtitle_entry.getElementsByTagName('SubRating') [0].firstChild.data) # download_count = int(try_get_first_child_data('SubDownloadsCnt', -1)) # subtitle_movie_aka = try_get_first_child_data('SubMovieAka', None) # subtitle_comments = int(try_get_first_child_data('SubComments', -1)) # subtitle_total = int(try_get_first_child_data('TotalSubs', -1)) #PRESENT? # subtitle_newest = try_get_first_child_data('Newest', None) #PRESENT? language = Language.from_xx(language_iso639) movie_release_name = subtitle_entry.getElementsByTagName( 'MovieReleaseName')[0].firstChild.data filename = '{}.{}'.format(movie_release_name, subtitle_format) download_link = 'http://www.opensubtitles.org/download/sub/{}'.format( subtitle_id) if user_nickname: uploader = user_nickname elif user_id != 0: uploader = str(user_id) else: uploader = None subtitle = OpenSubtitlesSubtitleFile( filename=filename, file_size=None, md5_hash=subtitle_uuid, id_online=subtitlefile_id, download_link=download_link, link=subtitle_link, uploader=uploader, language=language, rating=subtitle_rating, age=subtitle_add_date) subtitles.append(subtitle) except (AttributeError, IndexError, ValueError): log.warning('subtitle_entry={}'.format(subtitle_entry.toxml())) log.warning('XML entry has invalid format.', exc_info=sys.exc_info()) return subtitles, nb_so_far, nb_provider
def subtitle_info(self, raw_xml): dom = minidom.parseString(raw_xml) # Make the dom from raw xml entries = dom.getElementsByTagName( 'opensubtitles') # Pull out all entry's subtitle_entries = [] # Make an empty container to fill up and return data = None for entry in entries: if entry.getElementsByTagName('SubBrowse'): for result in entry.getElementsByTagName('SubBrowse'): if result.getElementsByTagName('Subtitle'): data = result.getElementsByTagName('Subtitle') break break # print "data=", data if not data: return [] # catch subtitle information for entry in data: sub_obj = subtitlefile.SubtitleFile(online=True) sub = {} if entry.getElementsByTagName('LinkDetails') and entry.getElementsByTagName('LinkDetails')[0].firstChild: sub['LinkDetails'] = entry.getElementsByTagName( 'LinkDetails')[0].firstChild.data if entry.getElementsByTagName('IDSubtitle'): sub['IDSubtitle'] = {'IDSubtitle': entry.getElementsByTagName('IDSubtitle')[0].firstChild.data, 'Link': entry.getElementsByTagName('IDSubtitle')[0].getAttribute('Link'), } sub_obj._onlineId = sub['IDSubtitle']['IDSubtitle'] if entry.getElementsByTagName('MovieReleaseName') and entry.getElementsByTagName('MovieReleaseName')[0].firstChild: sub['MovieReleaseName'] = entry.getElementsByTagName( 'MovieReleaseName')[0].firstChild.data if entry.getElementsByTagName('SubFormat') and entry.getElementsByTagName('SubFormat')[0].firstChild: sub['SubFormat'] = entry.getElementsByTagName( 'SubFormat')[0].firstChild.data sub_obj.setExtraInfo('format', sub['SubFormat']) if entry.getElementsByTagName('SubSumCD') and entry.getElementsByTagName('SubSumCD')[0].firstChild: sub['SubSumCD'] = entry.getElementsByTagName( 'SubSumCD')[0].firstChild.data sub_obj.setExtraInfo('totalCDs', sub['SubSumCD']) if entry.getElementsByTagName('SubAuthorComment') and entry.getElementsByTagName('SubAuthorComment')[0].firstChild: sub['SubAuthorComment'] = entry.getElementsByTagName( 'SubAuthorComment')[0].firstChild.data if entry.getElementsByTagName('SubAddDate') and entry.getElementsByTagName('SubAddDate')[0].firstChild: sub['SubAddDate'] = entry.getElementsByTagName( 'SubAddDate')[0].firstChild.data if entry.getElementsByTagName('SubSumVotes') and entry.getElementsByTagName('SubSumVotes')[0].firstChild: sub['SubSumVotes'] = entry.getElementsByTagName( 'SubSumVotes')[0].firstChild.data if entry.getElementsByTagName('SubRating') and entry.getElementsByTagName('SubRating')[0].firstChild: sub['SubRating'] = entry.getElementsByTagName( 'SubRating')[0].firstChild.data sub_obj.setRating(sub['SubRating']) if entry.getElementsByTagName('SubDownloadsCnt') and entry.getElementsByTagName('SubDownloadsCnt')[0].firstChild: sub['SubDownloadsCnt'] = entry.getElementsByTagName( 'SubDownloadsCnt')[0].firstChild.data sub_obj.setExtraInfo('totalDownloads', sub['SubDownloadsCnt']) if entry.getElementsByTagName('UserNickName') and entry.getElementsByTagName('UserNickName')[0].firstChild: sub['UserNickName'] = entry.getElementsByTagName( 'UserNickName')[0].firstChild.data sub_obj._uploader = sub['UserNickName'] if entry.getElementsByTagName('LanguageName') and entry.getElementsByTagName('LanguageName')[0].firstChild: sub['LanguageName'] = entry.getElementsByTagName( 'LanguageName')[0].firstChild.data sub_obj.setLanguage(Language.from_xx( entry.getElementsByTagName('LanguageName')[0].getAttribute('ISO639'))) if entry.getElementsByTagName('SubtitleFile'): SubtitleFile = {} _SubtitleFile = entry.getElementsByTagName('SubtitleFile')[0] _File = _SubtitleFile.getElementsByTagName('File')[0] SubtitleFile['File'] = {'ID': _SubtitleFile.getElementsByTagName('File')[0].getAttribute('ID'), 'SubActualCD': {'SubActualCD': _File.getElementsByTagName('SubActualCD')[0].firstChild.data, 'SubSize': _File.getElementsByTagName('SubActualCD')[0].getAttribute('Link'), 'MD5': _File.getElementsByTagName('SubActualCD')[0].getAttribute('MD5'), 'SubFileName': _File.getElementsByTagName('SubActualCD')[0].getAttribute('SubFileName'), 'DownloadLink': _File.getElementsByTagName('SubActualCD')[0].getAttribute('DownloadLink'), } } SubtitleFile['Download'] = {'Download': _SubtitleFile.getElementsByTagName('Download')[0].firstChild.data, 'DownloadLink': _SubtitleFile.getElementsByTagName('Download')[0].getAttribute('LinkDownloadBundle'), } sub['SubtitleFile'] = SubtitleFile global OnlyLink OnlyLink = _SubtitleFile.getElementsByTagName( 'Download')[0].getAttribute('LinkDownloadBundle') OnlyLink = ((OnlyLink.replace('dl', 'www')).replace( 'org/en', 'com')).replace('subb', 'sub') if entry.getElementsByTagName('Movie'): _Movie = entry.getElementsByTagName('Movie')[0] #sub['MovieName'] = _Movie.getElementsByTagName('MovieName')[0].firstChild.data sub['MovieID'] = {'MovieID': _Movie.getElementsByTagName('MovieName')[0].getAttribute('MovieID'), 'Link': _Movie.getElementsByTagName('MovieName')[0].getAttribute('Link'), } for section in _Movie.getElementsByTagName('section'): if section.getAttribute('type') == u"about": for info in section.getElementsByTagName("info"): if info.getElementsByTagName("web_url")[0].firstChild.data == u"http://www.imdb.com": sub['MovieID']['LinkImdb'] = info.getElementsByTagName( "link_detail")[0].firstChild.data if entry.getElementsByTagName('FullName') and entry.getElementsByTagName('FullName')[0].firstChild: sub['FullName'] = entry.getElementsByTagName( 'FullName')[0].firstChild.data if entry.getElementsByTagName('ReportLink') and entry.getElementsByTagName('ReportLink')[0].firstChild: sub['ReportLink'] = entry.getElementsByTagName( 'ReportLink')[0].firstChild.data # just a shortcut sub['DownloadLink'] = sub['SubtitleFile']['File']['SubActualCD']['DownloadLink'] Link().OneLink(OnlyLink) if sub: subtitle_entries.append(sub) return (subtitle_entries, sub_obj)
def translators_get(): from subdownloader.languages.language import Language return (Translator('Sylvestre Ledru', '*****@*****.**', (Language.from_xx('fr'), )), )